lucarecord 0.2.13

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+ require 'luca_record/version'
3
+
4
+ module LucaRecord
5
+ autoload :Base, 'luca_record/base'
6
+ autoload :Dict, 'luca_record/dict'
7
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'luca_record/version'
4
+ require 'luca_record/io'
5
+ require 'luca_support'
6
+
7
+ module LucaRecord
8
+ class Base
9
+ include LucaRecord::IO
10
+ include LucaSupport::View
11
+ end
12
+ end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'fileutils'
5
+ require 'yaml'
6
+ require 'pathname'
7
+ require 'luca_support'
8
+
9
+ #
10
+ # Low level API
11
+ #
12
+ module LucaRecord
13
+ class Dict
14
+ include LucaSupport::Code
15
+
16
+ def initialize(file = @filename)
17
+ @path = file
18
+ #@path = dict_path(file)
19
+ set_driver
20
+ end
21
+
22
+ def search(word, default_word = nil)
23
+ res = max_score_code(word)
24
+ if res[1] > 0.4
25
+ res[0]
26
+ else
27
+ default_word
28
+ end
29
+ end
30
+
31
+ #
32
+ # Column number settings for CSV/TSV convert
33
+ #
34
+ # :label
35
+ # for double entry data
36
+ # :counter_label
37
+ # must be specified with label
38
+ # :debit_label
39
+ # for double entry data
40
+ # * debit_value
41
+ # :credit_label
42
+ # for double entry data
43
+ # * credit_value
44
+ # :note
45
+ # can be the same column as another label
46
+ #
47
+ # :encoding
48
+ # file encoding
49
+ #
50
+ def csv_config
51
+ {}.tap do |config|
52
+ if @config.dig('label')
53
+ config[:label] = @config['label'].to_i
54
+ if @config.dig('counter_label')
55
+ config[:counter_label] = @config['counter_label']
56
+ config[:type] = 'single'
57
+ end
58
+ elsif @config.dig('debit_label')
59
+ config[:debit_label] = @config['debit_label'].to_i
60
+ if @config.dig('credit_label')
61
+ config[:credit_label] = @config['credit_label'].to_i
62
+ config[:type] = 'double'
63
+ end
64
+ end
65
+ config[:type] ||= 'invalid'
66
+ config[:debit_value] = @config['debit_value'].to_i if @config.dig('debit_value')
67
+ config[:credit_value] = @config['credit_value'].to_i if @config.dig('credit_value')
68
+ config[:note] = @config['note'].to_i if @config.dig('note')
69
+ config[:encoding] = @config['encoding'] if @config.dig('encoding')
70
+
71
+ config[:year] = @config['year'] if @config.dig('year')
72
+ config[:month] = @config['month'] if @config.dig('month')
73
+ config[:day] = @config['day'] if @config.dig('day')
74
+ end
75
+ end
76
+
77
+ #
78
+ # Load CSV with config options
79
+ #
80
+ def load_csv(path)
81
+ CSV.read(path, headers: true, encoding: "#{@config.dig('encoding') || 'utf-8'}:utf-8").each do |row|
82
+ yield row
83
+ end
84
+ end
85
+
86
+ #
87
+ # load dictionary data
88
+ #
89
+ def self.load(file = @filename)
90
+ case File.extname(file)
91
+ when '.tsv', '.csv'
92
+ load_tsv_dict(dict_path(file))
93
+ when '.yaml', '.yml'
94
+ YAML.load_file(dict_path(file), **{})
95
+ else
96
+ raise 'cannot load this filetype'
97
+ end
98
+ end
99
+
100
+ #
101
+ # generate dictionary from TSV file. Minimum assumption is as bellows:
102
+ # 1st row is converted symbol.
103
+ #
104
+ # * row[0] is 'code'. Converted hash keys
105
+ # * row[1] is 'label'. Should be human readable labels
106
+ # * after row[2] can be app specific data
107
+ #
108
+ def self.load_tsv_dict(path)
109
+ {}.tap do |dict|
110
+ CSV.read(path, headers: true, col_sep: "\t", encoding: 'UTF-8').each do |row|
111
+ {}.tap do |entry|
112
+ row.each do |header, field|
113
+ next if row.index(header).zero?
114
+
115
+ entry[header.to_sym] = field unless field.nil?
116
+ end
117
+ dict[row[0]] = entry
118
+ end
119
+ end
120
+ end
121
+ end
122
+
123
+ private
124
+
125
+ def set_driver
126
+ input = self.class.load(@path)
127
+ @config = input['config']
128
+ @definitions = input['definitions']
129
+ end
130
+
131
+ def self.dict_path(filename)
132
+ Pathname(LucaSupport::Config::Pjdir) / 'dict' / filename
133
+ end
134
+
135
+ def self.reverse(dict)
136
+ dict.map{ |k, v| [v[:label], k] }.to_h
137
+ end
138
+
139
+ def max_score_code(str)
140
+ res = @definitions.map do |k, v|
141
+ [v, LucaSupport.match_score(str, k, 3)]
142
+ end
143
+ res.max { |x, y| x[1] <=> y[1] }
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,337 @@
1
+ require 'csv'
2
+ require 'date'
3
+ require 'fileutils'
4
+ require 'yaml'
5
+ require 'pathname'
6
+ require 'luca_support/code'
7
+ require 'luca_support/config'
8
+
9
+ #
10
+ # Low level API
11
+ # manipulate files based on transaction date
12
+ #
13
+ module LucaRecord
14
+ module IO
15
+ include LucaSupport::Code
16
+
17
+ def self.included(klass) # :nodoc:
18
+ klass.extend ClassMethods
19
+ end
20
+
21
+ #
22
+ # Used @date for searching current settings
23
+ # query can be nested hash for other than 'val'
24
+ #
25
+ # where(contract_status: 'active')
26
+ # where(graded: {rank: 5})
27
+ #
28
+ def where(**query)
29
+ return enum_for(:where, **query) unless block_given?
30
+
31
+ query.each do |key, val|
32
+ v = val.respond_to?(:values) ? val.values.first : val
33
+ label = val.respond_to?(:keys) ? val.keys.first : 'val'
34
+ self.class.all do |data|
35
+ next unless data.keys.map(&:to_sym).include?(key)
36
+
37
+ processed = parse_current(data)
38
+ yield processed if v == processed.dig(key.to_s, label.to_s)
39
+ end
40
+ end
41
+ end
42
+
43
+ module ClassMethods
44
+ #
45
+ # find ID based record. Support uuid and encoded date.
46
+ #
47
+ def find(id, basedir = @dirname)
48
+ return enum_for(:find, id, basedir) unless block_given?
49
+
50
+ if id.length >= 40
51
+ open_hashed(basedir, id) do |f|
52
+ yield load_data(f)
53
+ end
54
+ elsif id.length >= 9
55
+ # TODO: need regexp match for more flexible coding(after AD9999)
56
+ open_records(basedir, id[0, 5], id[5, 6]) do |f, path|
57
+ yield load_data(f, path)
58
+ end
59
+ else
60
+ raise 'specified id length is too short'
61
+ end
62
+ end
63
+
64
+ #
65
+ # search date based record.
66
+ #
67
+ # * data hash
68
+ # * data id. Array like [2020H, V001]
69
+ #
70
+ def asof(year, month = nil, day = nil, basedir = @dirname)
71
+ return enum_for(:search, year, month, day, nil, basedir) unless block_given?
72
+
73
+ search(year, month, day, nil, basedir) do |data, path|
74
+ yield data, path
75
+ end
76
+ end
77
+
78
+ #
79
+ # search with date params & code.
80
+ #
81
+ def search(year, month = nil, day = nil, code = nil, basedir = @dirname)
82
+ return enum_for(:search, year, month, day, code, basedir) unless block_given?
83
+
84
+ subdir = year.to_s + LucaSupport::Code.encode_month(month)
85
+ open_records(basedir, subdir, LucaSupport::Code.encode_date(day), code) do |f, path|
86
+ if @record_type == 'raw'
87
+ yield f, path
88
+ else
89
+ yield load_data(f, path), path
90
+ end
91
+ end
92
+ end
93
+
94
+ #
95
+ # retrieve all data
96
+ #
97
+ def all(basedir = @dirname)
98
+ return enum_for(:all, basedir) unless block_given?
99
+
100
+ open_all(basedir) do |f|
101
+ yield load_data(f)
102
+ end
103
+ end
104
+
105
+ #
106
+ # convert ID to file path. Normal argument is as follows:
107
+ #
108
+ # * [2020H, V001]
109
+ # * "2020H/V001"
110
+ # * "a7b806d04a044c6dbc4ce72932867719"
111
+ #
112
+ def id2path(id)
113
+ if id.is_a?(Array)
114
+ id.join('/')
115
+ elsif id.include?('/')
116
+ id
117
+ else
118
+ encode_hashed_path(id)
119
+ end
120
+ end
121
+
122
+ #
123
+ # Directory separation for performance. Same as Git way.
124
+ #
125
+ def encode_hashed_path(id, split_factor = 3)
126
+ len = id.length
127
+ if len <= split_factor
128
+ ['', id]
129
+ else
130
+ [id[0, split_factor], id[split_factor, len - split_factor]]
131
+ end
132
+ end
133
+
134
+ def add_status!(id, status, basedir = @dirname)
135
+ path = abs_path(basedir) / id2path(id)
136
+ origin = YAML.load_file(path, {})
137
+ newline = { status => DateTime.now.to_s }
138
+ origin['status'] = [] if origin['status'].nil?
139
+ origin['status'] << newline
140
+ File.write(path, YAML.dump(origin.sort.to_h))
141
+ end
142
+
143
+ #
144
+ # create hash based record
145
+ #
146
+ def create(obj, basedir = @dirname)
147
+ id = LucaSupport::Code.issue_random_id
148
+ obj['id'] = id
149
+ open_hashed(basedir, id, 'w') do |f|
150
+ f.write(YAML.dump(obj.sort.to_h))
151
+ end
152
+ id
153
+ end
154
+
155
+ #
156
+ # define new transaction ID & write data at once
157
+ #
158
+ def create_record!(date_obj, codes = nil, basedir = @dirname)
159
+ gen_record_file!(basedir, date_obj, codes) do |f|
160
+ f.write CSV.generate('', col_sep: "\t", headers: false) { |c| yield(c) }
161
+ end
162
+ end
163
+
164
+ def gen_record_file!(basedir, date_obj, codes = nil)
165
+ d = prepare_dir!(abs_path(basedir), date_obj)
166
+ filename = LucaSupport::Code.encode_date(date_obj) + new_record_id(abs_path(basedir), date_obj)
167
+ if codes
168
+ filename += codes.inject('') { |fragment, code| "#{fragment}-#{code}" }
169
+ end
170
+ path = Pathname(d) + filename
171
+ File.open(path.to_s, 'w') { |f| yield(f) }
172
+ end
173
+
174
+ def new_record_id(basedir, date_obj)
175
+ LucaSupport::Code.encode_txid(new_record_no(basedir, date_obj))
176
+ end
177
+
178
+ def prepare_dir!(basedir, date_obj)
179
+ dir_name = (Pathname(basedir) + encode_dirname(date_obj)).to_s
180
+ FileUtils.mkdir_p(dir_name) unless Dir.exist?(dir_name)
181
+ dir_name
182
+ end
183
+
184
+ def encode_dirname(date_obj)
185
+ date_obj.year.to_s + LucaSupport::Code.encode_month(date_obj)
186
+ end
187
+
188
+ private
189
+
190
+ #
191
+ # open records with 'basedir/month/date-code' path structure.
192
+ # Glob pattern can be specified like folloing examples.
193
+ #
194
+ # '2020': All month of 2020
195
+ # '2020[FG]': June & July of 2020
196
+ #
197
+ # Block will receive code fragments as 2nd parameter. Array format is as bellows:
198
+ # 1. encoded month
199
+ # 2. encoded day + record number of the day
200
+ # 3. codes. More than 3 are all code set except first 2 parameters.
201
+ #
202
+ def open_records(basedir, subdir, filename = nil, code = nil, mode = 'r')
203
+ return enum_for(:open_records, basedir, subdir, filename, code, mode) unless block_given?
204
+
205
+ file_pattern = filename.nil? ? '*' : "#{filename}*"
206
+ Dir.chdir(abs_path(basedir)) do
207
+ Dir.glob("#{subdir}*/#{file_pattern}").sort.each do |subpath|
208
+ next if skip_on_unmatch_code(subpath, code)
209
+
210
+ id_set = subpath.split('/').map { |str| str.split('-') }.flatten
211
+ File.open(subpath, mode) { |f| yield(f, id_set) }
212
+ end
213
+ end
214
+ end
215
+
216
+ #
217
+ # git object like structure
218
+ #
219
+ def open_hashed(basedir, id, mode = 'r')
220
+ return enum_for(:open_hashed, basedir, id, mode) unless block_given?
221
+
222
+ subdir, filename = encode_hashed_path(id)
223
+ dirpath = Pathname(abs_path(basedir)) + subdir
224
+ FileUtils.mkdir_p(dirpath.to_s) if mode != 'r'
225
+ File.open((dirpath + filename).to_s, mode) { |f| yield f }
226
+ end
227
+
228
+ #
229
+ # scan through all files
230
+ #
231
+ def open_all(basedir, mode = 'r')
232
+ return enum_for(:open_all, basedir, mode) unless block_given?
233
+
234
+ dirpath = Pathname(abs_path(basedir)) / '*' / '*'
235
+ Dir.glob(dirpath.to_s).each do |filename|
236
+ File.open(filename, mode) { |f| yield f }
237
+ end
238
+ end
239
+
240
+ #
241
+ # Decode basic format.
242
+ # If specific decode is needed, override this method in each class.
243
+ #
244
+ def load_data(io, path = nil)
245
+ case @record_type
246
+ when 'raw'
247
+ # TODO: raw may be unneeded in favor of override
248
+ io
249
+ when 'json'
250
+ # TODO: implement JSON parse
251
+ else
252
+ YAML.load(io.read)
253
+ end
254
+ end
255
+
256
+ # TODO: replace with data_dir method
257
+ def abs_path(base_dir)
258
+ Pathname(LucaSupport::Config::Pjdir) / 'data' / base_dir
259
+ end
260
+
261
+ # true when file doesn't have record on code
262
+ # false when file may have one
263
+ def skip_on_unmatch_code(subpath, code = nil)
264
+ # p filename.split('-')[1..-1]
265
+ filename = subpath.split('/').last
266
+ return false if code.nil? || filename.length <= 4
267
+
268
+ !filename.split('-')[1..-1].include?(code)
269
+ end
270
+
271
+ # AUTO INCREMENT
272
+ def new_record_no(basedir, date_obj)
273
+ dir_name = (Pathname(basedir) + encode_dirname(date_obj)).to_s
274
+ raise 'No target dir exists.' unless Dir.exist?(dir_name)
275
+
276
+ Dir.chdir(dir_name) do
277
+ last_file = Dir.glob("#{LucaSupport::Code.encode_date(date_obj)}*").max
278
+ return 1 if last_file.nil?
279
+
280
+ return LucaSupport::Code.decode_txid(last_file[1, 3]) + 1
281
+ end
282
+ end
283
+ end # end of ClassModules
284
+
285
+ def set_data_dir(dir_path = LucaSupport::Config::Pjdir)
286
+ if dir_path.nil?
287
+ raise 'No project path is specified'
288
+ elsif !valid_project?(dir_path)
289
+ raise 'Specified path is not for valid project'
290
+ else
291
+ project_dir = Pathname(dir_path)
292
+ end
293
+
294
+ (project_dir + 'data/').to_s
295
+ end
296
+
297
+ def valid_project?(path)
298
+ project_dir = Pathname(path)
299
+ FileTest.file?((project_dir + 'config.yml').to_s) and FileTest.directory?( (project_dir + 'data').to_s)
300
+ end
301
+
302
+ #
303
+ # for date based records
304
+ #
305
+ def scan_terms(base_dir, query = nil)
306
+ pattern = query.nil? ? "*" : "#{query}*"
307
+ Dir.chdir(base_dir) do
308
+ Dir.glob(pattern).select { |dir|
309
+ FileTest.directory?(dir) && /^[0-9]/.match(dir)
310
+ }.sort.map { |str| decode_term(str) }
311
+ end
312
+ end
313
+
314
+ def load_config(path = nil)
315
+ path = path.to_s
316
+ if File.exists?(path)
317
+ YAML.load_file(path, **{})
318
+ else
319
+ {}
320
+ end
321
+ end
322
+
323
+ def has_status?(dat, status)
324
+ return false if dat['status'].nil?
325
+
326
+ dat['status'].map { |h| h.key?(status) }
327
+ .include?(true)
328
+ end
329
+
330
+ def load_tsv(path)
331
+ return enum_for(:load_tsv, path) unless block_given?
332
+
333
+ data = CSV.read(path, headers: true, col_sep: "\t", encoding: 'UTF-8')
334
+ data.each { |row| yield row }
335
+ end
336
+ end
337
+ end