lucarecord 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+ require 'luca_record/version'
3
+
4
+ module LucaRecord
5
+ autoload :Base, 'luca_record/base'
6
+ autoload :Dict, 'luca_record/dict'
7
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'luca_record/version'
4
+ require 'luca_record/io'
5
+ require 'luca_support'
6
+
7
+ module LucaRecord
8
+ class Base
9
+ include LucaRecord::IO
10
+ include LucaSupport::View
11
+ end
12
+ end
@@ -0,0 +1,146 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'fileutils'
5
+ require 'yaml'
6
+ require 'pathname'
7
+ require 'luca_support'
8
+
9
+ #
10
+ # Low level API
11
+ #
12
+ module LucaRecord
13
+ class Dict
14
+ include LucaSupport::Code
15
+
16
+ def initialize(file = @filename)
17
+ @path = file
18
+ #@path = dict_path(file)
19
+ set_driver
20
+ end
21
+
22
+ def search(word, default_word = nil)
23
+ res = max_score_code(word)
24
+ if res[1] > 0.4
25
+ res[0]
26
+ else
27
+ default_word
28
+ end
29
+ end
30
+
31
+ #
32
+ # Column number settings for CSV/TSV convert
33
+ #
34
+ # :label
35
+ # for double entry data
36
+ # :counter_label
37
+ # must be specified with label
38
+ # :debit_label
39
+ # for double entry data
40
+ # * debit_value
41
+ # :credit_label
42
+ # for double entry data
43
+ # * credit_value
44
+ # :note
45
+ # can be the same column as another label
46
+ #
47
+ # :encoding
48
+ # file encoding
49
+ #
50
+ def csv_config
51
+ {}.tap do |config|
52
+ if @config.dig('label')
53
+ config[:label] = @config['label'].to_i
54
+ if @config.dig('counter_label')
55
+ config[:counter_label] = @config['counter_label']
56
+ config[:type] = 'single'
57
+ end
58
+ elsif @config.dig('debit_label')
59
+ config[:debit_label] = @config['debit_label'].to_i
60
+ if @config.dig('credit_label')
61
+ config[:credit_label] = @config['credit_label'].to_i
62
+ config[:type] = 'double'
63
+ end
64
+ end
65
+ config[:type] ||= 'invalid'
66
+ config[:debit_value] = @config['debit_value'].to_i if @config.dig('debit_value')
67
+ config[:credit_value] = @config['credit_value'].to_i if @config.dig('credit_value')
68
+ config[:note] = @config['note'].to_i if @config.dig('note')
69
+ config[:encoding] = @config['encoding'] if @config.dig('encoding')
70
+
71
+ config[:year] = @config['year'] if @config.dig('year')
72
+ config[:month] = @config['month'] if @config.dig('month')
73
+ config[:day] = @config['day'] if @config.dig('day')
74
+ end
75
+ end
76
+
77
+ #
78
+ # Load CSV with config options
79
+ #
80
+ def load_csv(path)
81
+ CSV.read(path, headers: true, encoding: "#{@config.dig('encoding') || 'utf-8'}:utf-8").each do |row|
82
+ yield row
83
+ end
84
+ end
85
+
86
+ #
87
+ # load dictionary data
88
+ #
89
+ def self.load(file = @filename)
90
+ case File.extname(file)
91
+ when '.tsv', '.csv'
92
+ load_tsv_dict(dict_path(file))
93
+ when '.yaml', '.yml'
94
+ YAML.load_file(dict_path(file), **{})
95
+ else
96
+ raise 'cannot load this filetype'
97
+ end
98
+ end
99
+
100
+ #
101
+ # generate dictionary from TSV file. Minimum assumption is as bellows:
102
+ # 1st row is converted symbol.
103
+ #
104
+ # * row[0] is 'code'. Converted hash keys
105
+ # * row[1] is 'label'. Should be human readable labels
106
+ # * after row[2] can be app specific data
107
+ #
108
+ def self.load_tsv_dict(path)
109
+ {}.tap do |dict|
110
+ CSV.read(path, headers: true, col_sep: "\t", encoding: 'UTF-8').each do |row|
111
+ {}.tap do |entry|
112
+ row.each do |header, field|
113
+ next if row.index(header).zero?
114
+
115
+ entry[header.to_sym] = field unless field.nil?
116
+ end
117
+ dict[row[0]] = entry
118
+ end
119
+ end
120
+ end
121
+ end
122
+
123
+ private
124
+
125
+ def set_driver
126
+ input = self.class.load(@path)
127
+ @config = input['config']
128
+ @definitions = input['definitions']
129
+ end
130
+
131
+ def self.dict_path(filename)
132
+ Pathname(LucaSupport::Config::Pjdir) / 'dict' / filename
133
+ end
134
+
135
+ def self.reverse(dict)
136
+ dict.map{ |k, v| [v[:label], k] }.to_h
137
+ end
138
+
139
+ def max_score_code(str)
140
+ res = @definitions.map do |k, v|
141
+ [v, LucaSupport.match_score(str, k, 3)]
142
+ end
143
+ res.max { |x, y| x[1] <=> y[1] }
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,337 @@
1
+ require 'csv'
2
+ require 'date'
3
+ require 'fileutils'
4
+ require 'yaml'
5
+ require 'pathname'
6
+ require 'luca_support/code'
7
+ require 'luca_support/config'
8
+
9
+ #
10
+ # Low level API
11
+ # manipulate files based on transaction date
12
+ #
13
+ module LucaRecord
14
+ module IO
15
+ include LucaSupport::Code
16
+
17
+ def self.included(klass) # :nodoc:
18
+ klass.extend ClassMethods
19
+ end
20
+
21
+ #
22
+ # Used @date for searching current settings
23
+ # query can be nested hash for other than 'val'
24
+ #
25
+ # where(contract_status: 'active')
26
+ # where(graded: {rank: 5})
27
+ #
28
+ def where(**query)
29
+ return enum_for(:where, **query) unless block_given?
30
+
31
+ query.each do |key, val|
32
+ v = val.respond_to?(:values) ? val.values.first : val
33
+ label = val.respond_to?(:keys) ? val.keys.first : 'val'
34
+ self.class.all do |data|
35
+ next unless data.keys.map(&:to_sym).include?(key)
36
+
37
+ processed = parse_current(data)
38
+ yield processed if v == processed.dig(key.to_s, label.to_s)
39
+ end
40
+ end
41
+ end
42
+
43
+ module ClassMethods
44
+ #
45
+ # find ID based record. Support uuid and encoded date.
46
+ #
47
+ def find(id, basedir = @dirname)
48
+ return enum_for(:find, id, basedir) unless block_given?
49
+
50
+ if id.length >= 40
51
+ open_hashed(basedir, id) do |f|
52
+ yield load_data(f)
53
+ end
54
+ elsif id.length >= 9
55
+ # TODO: need regexp match for more flexible coding(after AD9999)
56
+ open_records(basedir, id[0, 5], id[5, 6]) do |f, path|
57
+ yield load_data(f, path)
58
+ end
59
+ else
60
+ raise 'specified id length is too short'
61
+ end
62
+ end
63
+
64
+ #
65
+ # search date based record.
66
+ #
67
+ # * data hash
68
+ # * data id. Array like [2020H, V001]
69
+ #
70
+ def asof(year, month = nil, day = nil, basedir = @dirname)
71
+ return enum_for(:search, year, month, day, nil, basedir) unless block_given?
72
+
73
+ search(year, month, day, nil, basedir) do |data, path|
74
+ yield data, path
75
+ end
76
+ end
77
+
78
+ #
79
+ # search with date params & code.
80
+ #
81
+ def search(year, month = nil, day = nil, code = nil, basedir = @dirname)
82
+ return enum_for(:search, year, month, day, code, basedir) unless block_given?
83
+
84
+ subdir = year.to_s + LucaSupport::Code.encode_month(month)
85
+ open_records(basedir, subdir, LucaSupport::Code.encode_date(day), code) do |f, path|
86
+ if @record_type == 'raw'
87
+ yield f, path
88
+ else
89
+ yield load_data(f, path), path
90
+ end
91
+ end
92
+ end
93
+
94
+ #
95
+ # retrieve all data
96
+ #
97
+ def all(basedir = @dirname)
98
+ return enum_for(:all, basedir) unless block_given?
99
+
100
+ open_all(basedir) do |f|
101
+ yield load_data(f)
102
+ end
103
+ end
104
+
105
+ #
106
+ # convert ID to file path. Normal argument is as follows:
107
+ #
108
+ # * [2020H, V001]
109
+ # * "2020H/V001"
110
+ # * "a7b806d04a044c6dbc4ce72932867719"
111
+ #
112
+ def id2path(id)
113
+ if id.is_a?(Array)
114
+ id.join('/')
115
+ elsif id.include?('/')
116
+ id
117
+ else
118
+ encode_hashed_path(id)
119
+ end
120
+ end
121
+
122
+ #
123
+ # Directory separation for performance. Same as Git way.
124
+ #
125
+ def encode_hashed_path(id, split_factor = 3)
126
+ len = id.length
127
+ if len <= split_factor
128
+ ['', id]
129
+ else
130
+ [id[0, split_factor], id[split_factor, len - split_factor]]
131
+ end
132
+ end
133
+
134
+ def add_status!(id, status, basedir = @dirname)
135
+ path = abs_path(basedir) / id2path(id)
136
+ origin = YAML.load_file(path, {})
137
+ newline = { status => DateTime.now.to_s }
138
+ origin['status'] = [] if origin['status'].nil?
139
+ origin['status'] << newline
140
+ File.write(path, YAML.dump(origin.sort.to_h))
141
+ end
142
+
143
+ #
144
+ # create hash based record
145
+ #
146
+ def create(obj, basedir = @dirname)
147
+ id = LucaSupport::Code.issue_random_id
148
+ obj['id'] = id
149
+ open_hashed(basedir, id, 'w') do |f|
150
+ f.write(YAML.dump(obj.sort.to_h))
151
+ end
152
+ id
153
+ end
154
+
155
+ #
156
+ # define new transaction ID & write data at once
157
+ #
158
+ def create_record!(date_obj, codes = nil, basedir = @dirname)
159
+ gen_record_file!(basedir, date_obj, codes) do |f|
160
+ f.write CSV.generate('', col_sep: "\t", headers: false) { |c| yield(c) }
161
+ end
162
+ end
163
+
164
+ def gen_record_file!(basedir, date_obj, codes = nil)
165
+ d = prepare_dir!(abs_path(basedir), date_obj)
166
+ filename = LucaSupport::Code.encode_date(date_obj) + new_record_id(abs_path(basedir), date_obj)
167
+ if codes
168
+ filename += codes.inject('') { |fragment, code| "#{fragment}-#{code}" }
169
+ end
170
+ path = Pathname(d) + filename
171
+ File.open(path.to_s, 'w') { |f| yield(f) }
172
+ end
173
+
174
+ def new_record_id(basedir, date_obj)
175
+ LucaSupport::Code.encode_txid(new_record_no(basedir, date_obj))
176
+ end
177
+
178
+ def prepare_dir!(basedir, date_obj)
179
+ dir_name = (Pathname(basedir) + encode_dirname(date_obj)).to_s
180
+ FileUtils.mkdir_p(dir_name) unless Dir.exist?(dir_name)
181
+ dir_name
182
+ end
183
+
184
+ def encode_dirname(date_obj)
185
+ date_obj.year.to_s + LucaSupport::Code.encode_month(date_obj)
186
+ end
187
+
188
+ private
189
+
190
+ #
191
+ # open records with 'basedir/month/date-code' path structure.
192
+ # Glob pattern can be specified like folloing examples.
193
+ #
194
+ # '2020': All month of 2020
195
+ # '2020[FG]': June & July of 2020
196
+ #
197
+ # Block will receive code fragments as 2nd parameter. Array format is as bellows:
198
+ # 1. encoded month
199
+ # 2. encoded day + record number of the day
200
+ # 3. codes. More than 3 are all code set except first 2 parameters.
201
+ #
202
+ def open_records(basedir, subdir, filename = nil, code = nil, mode = 'r')
203
+ return enum_for(:open_records, basedir, subdir, filename, code, mode) unless block_given?
204
+
205
+ file_pattern = filename.nil? ? '*' : "#{filename}*"
206
+ Dir.chdir(abs_path(basedir)) do
207
+ Dir.glob("#{subdir}*/#{file_pattern}").sort.each do |subpath|
208
+ next if skip_on_unmatch_code(subpath, code)
209
+
210
+ id_set = subpath.split('/').map { |str| str.split('-') }.flatten
211
+ File.open(subpath, mode) { |f| yield(f, id_set) }
212
+ end
213
+ end
214
+ end
215
+
216
+ #
217
+ # git object like structure
218
+ #
219
+ def open_hashed(basedir, id, mode = 'r')
220
+ return enum_for(:open_hashed, basedir, id, mode) unless block_given?
221
+
222
+ subdir, filename = encode_hashed_path(id)
223
+ dirpath = Pathname(abs_path(basedir)) + subdir
224
+ FileUtils.mkdir_p(dirpath.to_s) if mode != 'r'
225
+ File.open((dirpath + filename).to_s, mode) { |f| yield f }
226
+ end
227
+
228
+ #
229
+ # scan through all files
230
+ #
231
+ def open_all(basedir, mode = 'r')
232
+ return enum_for(:open_all, basedir, mode) unless block_given?
233
+
234
+ dirpath = Pathname(abs_path(basedir)) / '*' / '*'
235
+ Dir.glob(dirpath.to_s).each do |filename|
236
+ File.open(filename, mode) { |f| yield f }
237
+ end
238
+ end
239
+
240
+ #
241
+ # Decode basic format.
242
+ # If specific decode is needed, override this method in each class.
243
+ #
244
+ def load_data(io, path = nil)
245
+ case @record_type
246
+ when 'raw'
247
+ # TODO: raw may be unneeded in favor of override
248
+ io
249
+ when 'json'
250
+ # TODO: implement JSON parse
251
+ else
252
+ YAML.load(io.read)
253
+ end
254
+ end
255
+
256
+ # TODO: replace with data_dir method
257
+ def abs_path(base_dir)
258
+ Pathname(LucaSupport::Config::Pjdir) / 'data' / base_dir
259
+ end
260
+
261
+ # true when file doesn't have record on code
262
+ # false when file may have one
263
+ def skip_on_unmatch_code(subpath, code = nil)
264
+ # p filename.split('-')[1..-1]
265
+ filename = subpath.split('/').last
266
+ return false if code.nil? || filename.length <= 4
267
+
268
+ !filename.split('-')[1..-1].include?(code)
269
+ end
270
+
271
+ # AUTO INCREMENT
272
+ def new_record_no(basedir, date_obj)
273
+ dir_name = (Pathname(basedir) + encode_dirname(date_obj)).to_s
274
+ raise 'No target dir exists.' unless Dir.exist?(dir_name)
275
+
276
+ Dir.chdir(dir_name) do
277
+ last_file = Dir.glob("#{LucaSupport::Code.encode_date(date_obj)}*").max
278
+ return 1 if last_file.nil?
279
+
280
+ return LucaSupport::Code.decode_txid(last_file[1, 3]) + 1
281
+ end
282
+ end
283
+ end # end of ClassModules
284
+
285
+ def set_data_dir(dir_path = LucaSupport::Config::Pjdir)
286
+ if dir_path.nil?
287
+ raise 'No project path is specified'
288
+ elsif !valid_project?(dir_path)
289
+ raise 'Specified path is not for valid project'
290
+ else
291
+ project_dir = Pathname(dir_path)
292
+ end
293
+
294
+ (project_dir + 'data/').to_s
295
+ end
296
+
297
+ def valid_project?(path)
298
+ project_dir = Pathname(path)
299
+ FileTest.file?((project_dir + 'config.yml').to_s) and FileTest.directory?( (project_dir + 'data').to_s)
300
+ end
301
+
302
+ #
303
+ # for date based records
304
+ #
305
+ def scan_terms(base_dir, query = nil)
306
+ pattern = query.nil? ? "*" : "#{query}*"
307
+ Dir.chdir(base_dir) do
308
+ Dir.glob(pattern).select { |dir|
309
+ FileTest.directory?(dir) && /^[0-9]/.match(dir)
310
+ }.sort.map { |str| decode_term(str) }
311
+ end
312
+ end
313
+
314
+ def load_config(path = nil)
315
+ path = path.to_s
316
+ if File.exists?(path)
317
+ YAML.load_file(path, **{})
318
+ else
319
+ {}
320
+ end
321
+ end
322
+
323
+ def has_status?(dat, status)
324
+ return false if dat['status'].nil?
325
+
326
+ dat['status'].map { |h| h.key?(status) }
327
+ .include?(true)
328
+ end
329
+
330
+ def load_tsv(path)
331
+ return enum_for(:load_tsv, path) unless block_given?
332
+
333
+ data = CSV.read(path, headers: true, col_sep: "\t", encoding: 'UTF-8')
334
+ data.each { |row| yield row }
335
+ end
336
+ end
337
+ end