ad_hoc_template 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/ad_hoc_template.rb +24 -3
- data/lib/ad_hoc_template/command_line_interface.rb +27 -11
- data/lib/ad_hoc_template/entry_format_generator.rb +49 -0
- data/lib/ad_hoc_template/parser.rb +127 -58
- data/lib/ad_hoc_template/pseudohiki_formatter.rb +1 -1
- data/lib/ad_hoc_template/record_reader.rb +294 -160
- data/lib/ad_hoc_template/version.rb +1 -1
- data/spec/ad_hoc_template_spec.rb +151 -5
- data/spec/command_line_interface_spec.rb +107 -7
- data/spec/entry_format_generator_spec.rb +163 -0
- data/spec/parser_spec.rb +114 -18
- data/spec/pseudohiki_formatter_spec.rb +2 -2
- data/spec/record_reader_spec.rb +253 -9
- metadata +6 -3
@@ -11,8 +11,8 @@ module AdHocTemplate
|
|
11
11
|
YAML.load(yaml_data)
|
12
12
|
end
|
13
13
|
|
14
|
-
def self.
|
15
|
-
data = RecordReader.
|
14
|
+
def self.dump(config_data)
|
15
|
+
data = RecordReader.parse_if_necessary(config_data)
|
16
16
|
YAML.dump(data)
|
17
17
|
end
|
18
18
|
end
|
@@ -22,13 +22,20 @@ module AdHocTemplate
|
|
22
22
|
JSON.parse(json_data)
|
23
23
|
end
|
24
24
|
|
25
|
-
def self.
|
26
|
-
data = RecordReader.
|
27
|
-
JSON.
|
25
|
+
def self.dump(config_data)
|
26
|
+
data = RecordReader.parse_if_necessary(config_data)
|
27
|
+
JSON.pretty_generate(data)
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
31
|
module CSVReader
|
32
|
+
COL_SEP = {
|
33
|
+
csv: CSV::DEFAULT_OPTIONS[:col_sep],
|
34
|
+
tsv: "\t"
|
35
|
+
}
|
36
|
+
|
37
|
+
class NotSupportedError < StandardError; end
|
38
|
+
|
32
39
|
def self.read_record(csv_data, config={ csv: nil })
|
33
40
|
label, sep = parse_config(config)
|
34
41
|
header, *data = CSV.new(csv_data, col_sep: sep).to_a
|
@@ -42,6 +49,19 @@ module AdHocTemplate
|
|
42
49
|
end
|
43
50
|
end
|
44
51
|
|
52
|
+
def self.dump(config_data, col_sep=COL_SEP[:csv])
|
53
|
+
data = RecordReader.parse_if_necessary(config_data)
|
54
|
+
raise NotSupportedError unless csv_compatible_format?(data)
|
55
|
+
|
56
|
+
if kv_pairs = find_sub_records(data)
|
57
|
+
records = hashes_to_arrays(kv_pairs)
|
58
|
+
else
|
59
|
+
records = data.to_a.transpose
|
60
|
+
end
|
61
|
+
|
62
|
+
array_to_csv(records, col_sep)
|
63
|
+
end
|
64
|
+
|
45
65
|
def self.convert_to_hash(header, row_array)
|
46
66
|
{}.tap do |record|
|
47
67
|
header.zip(row_array).each do |key, value|
|
@@ -60,221 +80,335 @@ module AdHocTemplate
|
|
60
80
|
when Hash
|
61
81
|
format, label = config.to_a[0]
|
62
82
|
end
|
63
|
-
|
64
|
-
return label,
|
83
|
+
col_sep = COL_SEP[format||:csv]
|
84
|
+
return label, col_sep
|
65
85
|
end
|
66
86
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
SEPARATOR = /:\s*/o
|
71
|
-
BLOCK_HEAD = /\A\/\/\/@/o
|
72
|
-
ITERATION_HEAD = /\A\/\/\/@#/o
|
73
|
-
EMPTY_LINE = /\A(?:\r?\n|\r)\Z/o
|
74
|
-
ITERATION_MARK = /\A#/o
|
75
|
-
READERS_RE = {
|
76
|
-
key_value: SEPARATOR,
|
77
|
-
iteration: ITERATION_HEAD,
|
78
|
-
block: BLOCK_HEAD,
|
79
|
-
empty_line: EMPTY_LINE,
|
80
|
-
}
|
81
|
-
|
82
|
-
class ReaderState
|
83
|
-
attr_accessor :current_block_label
|
84
|
-
|
85
|
-
def initialize(config={}, stack=[])
|
86
|
-
@stack = stack
|
87
|
-
@configs = [config]
|
88
|
-
setup_reader
|
87
|
+
def self.csv_compatible_format?(data)
|
88
|
+
iteration_blocks_count = data.values.select {|v| v.kind_of? Array }.size
|
89
|
+
iteration_blocks_count == 0 or (iteration_blocks_count == 1 && data.size == 1)
|
89
90
|
end
|
90
91
|
|
91
|
-
def
|
92
|
-
|
92
|
+
def self.hashes_to_arrays(data)
|
93
|
+
headers = data.max_by {|h| h.keys.size }.keys
|
94
|
+
records = data.map {|record| headers.map {|header| record[header] } }
|
95
|
+
records.unshift headers
|
93
96
|
end
|
94
97
|
|
95
|
-
def
|
96
|
-
|
98
|
+
def self.find_sub_records(data)
|
99
|
+
data.values.find {|v| v.kind_of? Array }
|
97
100
|
end
|
98
101
|
|
99
|
-
def
|
100
|
-
|
101
|
-
|
102
|
+
def self.array_to_csv(records, col_sep)
|
103
|
+
# I do not adopt "records.map {|rec| rec.to_csv }.join",
|
104
|
+
# because I'm not sure if it is sufficient for certain data or not.
|
105
|
+
# For example, a field value may contain carriage returns or line feeds,
|
106
|
+
# and in that case, improper handling of the end of record would be damaging.
|
102
107
|
|
103
|
-
|
104
|
-
|
108
|
+
CSV.generate('', col_sep: col_sep) do |csv|
|
109
|
+
records.each {|record| csv << record }
|
110
|
+
end
|
105
111
|
end
|
106
112
|
|
107
|
-
|
108
|
-
|
109
|
-
|
113
|
+
private_class_method :convert_to_hash, :parse_config
|
114
|
+
private_class_method :csv_compatible_format?, :hashes_to_arrays
|
115
|
+
private_class_method :find_sub_records, :array_to_csv
|
116
|
+
end
|
110
117
|
|
111
|
-
|
112
|
-
|
113
|
-
@configs.push new_record
|
114
|
-
new_record
|
115
|
-
end
|
118
|
+
module TSVReader
|
119
|
+
COL_SEP = CSVReader::COL_SEP
|
116
120
|
|
117
|
-
def
|
118
|
-
|
121
|
+
def self.read_record(tsv_data, config={ tsv: nil })
|
122
|
+
config = { tsv: config } if config.kind_of? String
|
123
|
+
CSVReader.read_record(tsv_data, config)
|
119
124
|
end
|
120
125
|
|
121
|
-
def
|
122
|
-
|
126
|
+
def self.dump(config_data, col_sep=COL_SEP[:tsv])
|
127
|
+
CSVReader.dump(config_data, col_sep)
|
123
128
|
end
|
129
|
+
end
|
124
130
|
|
125
|
-
|
126
|
-
|
127
|
-
|
131
|
+
module DefaultFormReader
|
132
|
+
SEPARATOR = /:\s*/o
|
133
|
+
BLOCK_HEAD = /\A\/\/\/@/o
|
134
|
+
ITERATION_HEAD = /\A\/\/\/@#/o
|
135
|
+
EMPTY_LINE = /\A#{LINE_END_STR}\Z/o
|
136
|
+
ITERATION_MARK = /\A#/o
|
137
|
+
READERS_RE = {
|
138
|
+
key_value: SEPARATOR,
|
139
|
+
iteration: ITERATION_HEAD,
|
140
|
+
block: BLOCK_HEAD,
|
141
|
+
empty_line: EMPTY_LINE,
|
142
|
+
}
|
143
|
+
|
144
|
+
class ReaderState
|
145
|
+
attr_accessor :current_block_label
|
146
|
+
|
147
|
+
def initialize(config={}, stack=[])
|
148
|
+
@stack = stack
|
149
|
+
@configs = [config]
|
150
|
+
setup_reader
|
151
|
+
end
|
128
152
|
|
129
|
-
|
130
|
-
|
131
|
-
lines.each do |line|
|
132
|
-
setup_stack(line)
|
133
|
-
read(line)
|
153
|
+
def push(reader)
|
154
|
+
@stack.push reader
|
134
155
|
end
|
135
|
-
remove_trailing_empty_lines_from_last_block!
|
136
|
-
parsed_record
|
137
|
-
end
|
138
156
|
|
139
|
-
|
140
|
-
|
141
|
-
|
157
|
+
def pop
|
158
|
+
@stack.pop unless @stack.length == 1
|
159
|
+
end
|
142
160
|
|
143
|
-
|
144
|
-
|
145
|
-
|
161
|
+
def setup_stack(line)
|
162
|
+
@stack[-1].setup_stack(line)
|
163
|
+
end
|
164
|
+
|
165
|
+
def current_reader
|
166
|
+
@stack[-1]
|
146
167
|
end
|
147
|
-
end
|
148
168
|
|
149
|
-
|
169
|
+
def read(line)
|
170
|
+
@stack[-1].read(line)
|
171
|
+
end
|
172
|
+
|
173
|
+
def push_new_record
|
174
|
+
new_record = {}
|
175
|
+
@configs.push new_record
|
176
|
+
new_record
|
177
|
+
end
|
178
|
+
|
179
|
+
def pop_current_record
|
180
|
+
@configs.pop
|
181
|
+
end
|
182
|
+
|
183
|
+
def current_record
|
184
|
+
@configs[-1]
|
185
|
+
end
|
186
|
+
|
187
|
+
def parsed_record
|
188
|
+
@configs[0]
|
189
|
+
end
|
150
190
|
|
151
|
-
|
152
|
-
|
191
|
+
def read_record(lines)
|
192
|
+
lines = lines.each_line.to_a if lines.kind_of? String
|
193
|
+
lines.each do |line|
|
194
|
+
setup_stack(line)
|
195
|
+
read(line)
|
196
|
+
end
|
197
|
+
remove_trailing_empty_lines_from_last_block!
|
198
|
+
parsed_record
|
199
|
+
end
|
200
|
+
|
201
|
+
def last_block_value
|
202
|
+
current_record[current_block_label]
|
203
|
+
end
|
204
|
+
|
205
|
+
def remove_trailing_empty_lines_from_last_block!
|
206
|
+
if current_reader.kind_of? BlockReader
|
207
|
+
last_block_value.sub!(/(#{$/})+\Z/, $/)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
private
|
212
|
+
|
213
|
+
def setup_reader
|
214
|
+
Reader.setup_reader(self)
|
215
|
+
end
|
153
216
|
end
|
154
|
-
end
|
155
217
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
218
|
+
class Reader
|
219
|
+
def self.setup_reader(stack)
|
220
|
+
readers = {}
|
221
|
+
{
|
222
|
+
base: BaseReader,
|
223
|
+
key_value: KeyValueReader,
|
224
|
+
block: BlockReader,
|
225
|
+
iteration: IterationReader,
|
226
|
+
}.each do |k, v|
|
227
|
+
readers[k] = v.new(stack, readers)
|
228
|
+
end
|
229
|
+
stack.push readers[:base]
|
230
|
+
readers
|
231
|
+
end
|
232
|
+
|
233
|
+
def initialize(stack, readers)
|
234
|
+
@stack = stack
|
235
|
+
@readers = readers
|
236
|
+
end
|
237
|
+
|
238
|
+
def pop_stack
|
239
|
+
@stack.pop
|
240
|
+
end
|
241
|
+
|
242
|
+
def read(line)
|
243
|
+
end
|
244
|
+
|
245
|
+
private
|
246
|
+
|
247
|
+
def push_reader_if_match(line, readers)
|
248
|
+
readers.each do |reader|
|
249
|
+
return @stack.push(@readers[reader]) if READERS_RE[reader] === line
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def setup_new_block(line, initial_value)
|
254
|
+
label = line.sub(BLOCK_HEAD, "").chomp
|
255
|
+
@stack.current_record[label] ||= initial_value
|
256
|
+
@stack.current_block_label = label
|
257
|
+
end
|
169
258
|
end
|
170
259
|
|
171
|
-
|
172
|
-
|
173
|
-
|
260
|
+
|
261
|
+
class BaseReader < Reader
|
262
|
+
def setup_stack(line)
|
263
|
+
push_reader_if_match(line, [:iteration, :block, :key_value])
|
264
|
+
end
|
174
265
|
end
|
175
266
|
|
176
|
-
|
177
|
-
|
267
|
+
class KeyValueReader < Reader
|
268
|
+
def setup_stack(line)
|
269
|
+
case line
|
270
|
+
when EMPTY_LINE, ITERATION_HEAD, BLOCK_HEAD
|
271
|
+
pop_stack
|
272
|
+
end
|
273
|
+
push_reader_if_match(line, [:iteration, :block])
|
274
|
+
end
|
275
|
+
|
276
|
+
def read(line)
|
277
|
+
key, value = line.split(SEPARATOR, 2)
|
278
|
+
@stack.current_record[key] = value.chomp
|
279
|
+
end
|
178
280
|
end
|
179
281
|
|
180
|
-
|
282
|
+
class BlockReader < Reader
|
283
|
+
def setup_stack(line)
|
284
|
+
case line
|
285
|
+
when ITERATION_HEAD, BLOCK_HEAD
|
286
|
+
@stack.remove_trailing_empty_lines_from_last_block!
|
287
|
+
pop_stack
|
288
|
+
end
|
289
|
+
push_reader_if_match(line, [:iteration, :block])
|
290
|
+
end
|
291
|
+
|
292
|
+
def read(line)
|
293
|
+
block_value = @stack.last_block_value
|
294
|
+
case line
|
295
|
+
when BLOCK_HEAD
|
296
|
+
setup_new_block(line, String.new)
|
297
|
+
when EMPTY_LINE
|
298
|
+
block_value << line unless block_value.empty?
|
299
|
+
else
|
300
|
+
block_value << line
|
301
|
+
end
|
302
|
+
end
|
181
303
|
end
|
182
304
|
|
183
|
-
|
305
|
+
class IterationReader < Reader
|
306
|
+
def setup_stack(line)
|
307
|
+
case line
|
308
|
+
when ITERATION_HEAD
|
309
|
+
@stack.pop_current_record
|
310
|
+
when BLOCK_HEAD
|
311
|
+
@stack.pop_current_record
|
312
|
+
pop_stack
|
313
|
+
@stack.push @readers[:block]
|
314
|
+
when SEPARATOR
|
315
|
+
@stack.pop_current_record
|
316
|
+
@stack.last_block_value.push @stack.push_new_record
|
317
|
+
@stack.push @readers[:key_value]
|
318
|
+
end
|
319
|
+
end
|
184
320
|
|
185
|
-
|
186
|
-
|
187
|
-
|
321
|
+
def read(line)
|
322
|
+
case line
|
323
|
+
when ITERATION_HEAD
|
324
|
+
setup_new_block(line, [])
|
325
|
+
@stack.push_new_record
|
326
|
+
end
|
188
327
|
end
|
189
328
|
end
|
190
329
|
|
191
|
-
def
|
192
|
-
|
193
|
-
@stack.current_record[label] ||= initial_value
|
194
|
-
@stack.current_block_label = label
|
330
|
+
def self.read_record(input)
|
331
|
+
ReaderState.new.read_record(input)
|
195
332
|
end
|
196
|
-
end
|
197
333
|
|
334
|
+
def self.dump(labels)
|
335
|
+
iteration_keys, kv_keys, block_keys = categorize_keys(labels)
|
198
336
|
|
199
|
-
|
200
|
-
|
201
|
-
|
337
|
+
key_value_part = format_key_value_pairs(kv_keys, labels)
|
338
|
+
iteration_part = format_iteration_block(iteration_keys, labels)
|
339
|
+
block_part = format_key_value_block(block_keys, labels)
|
340
|
+
|
341
|
+
[key_value_part, iteration_part, block_part].join($/).sub(/(#{$/}+)\Z/, $/)
|
202
342
|
end
|
203
|
-
end
|
204
343
|
|
205
|
-
|
206
|
-
|
207
|
-
case line
|
208
|
-
when EMPTY_LINE, ITERATION_HEAD, BLOCK_HEAD
|
209
|
-
pop_stack
|
210
|
-
end
|
211
|
-
push_reader_if_match(line, [:iteration, :block])
|
344
|
+
def self.format_key_value_pairs(key_names, labels={})
|
345
|
+
key_names.map {|key| "#{key}: #{labels[key]}#{$/}" }.join
|
212
346
|
end
|
213
347
|
|
214
|
-
def
|
215
|
-
|
216
|
-
|
348
|
+
def self.format_key_value_block(key_names, labels)
|
349
|
+
[].tap do |blocks|
|
350
|
+
key_names.each do |key|
|
351
|
+
blocks.push "///@#{key}#{$/*2}#{labels[key]}"
|
352
|
+
end
|
353
|
+
end.join($/)
|
217
354
|
end
|
218
|
-
end
|
219
355
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
356
|
+
def self.format_iteration_block(key_names, labels)
|
357
|
+
key_names.map do |iteration_label|
|
358
|
+
iteration_block = ["///@#{iteration_label}#{$/}"]
|
359
|
+
labels[iteration_label].each do |sub_record|
|
360
|
+
iteration_block.push format_key_value_pairs(sub_record.keys, sub_record)
|
361
|
+
end
|
362
|
+
iteration_block.join($/)
|
363
|
+
end.join($/)
|
228
364
|
end
|
229
365
|
|
230
|
-
def
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
else
|
238
|
-
block_value << line
|
366
|
+
def self.categorize_keys(labels)
|
367
|
+
iteration_part, rest = labels.partition do |e|
|
368
|
+
e[1].kind_of? Array
|
369
|
+
end.map {|e| e.map(&:first) }
|
370
|
+
|
371
|
+
block_part, key_value_part = rest.partition do |e|
|
372
|
+
LINE_END_RE =~ labels[e]
|
239
373
|
end
|
374
|
+
|
375
|
+
return iteration_part, key_value_part, block_part
|
240
376
|
end
|
377
|
+
|
378
|
+
private_class_method :format_key_value_pairs
|
379
|
+
private_class_method :format_key_value_block
|
380
|
+
private_class_method :format_iteration_block
|
381
|
+
private_class_method :categorize_keys
|
241
382
|
end
|
242
383
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
pop_stack
|
251
|
-
@stack.push @readers[:block]
|
252
|
-
when SEPARATOR
|
253
|
-
@stack.pop_current_record
|
254
|
-
@stack.last_block_value.push @stack.push_new_record
|
255
|
-
@stack.push @readers[:key_value]
|
256
|
-
end
|
257
|
-
end
|
384
|
+
FORMAT_NAME_TO_READER = {
|
385
|
+
yaml: YAMLReader,
|
386
|
+
json: JSONReader,
|
387
|
+
csv: CSVReader,
|
388
|
+
tsv: TSVReader,
|
389
|
+
default: DefaultFormReader,
|
390
|
+
}
|
258
391
|
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
@stack.push_new_record
|
264
|
-
end
|
265
|
-
end
|
392
|
+
FORMAT_NAME_TO_READER.default = DefaultFormReader
|
393
|
+
|
394
|
+
def self.dump(data_source, target_format=:default)
|
395
|
+
FORMAT_NAME_TO_READER[target_format].dump(data_source)
|
266
396
|
end
|
267
397
|
|
268
398
|
def self.read_record(input, source_format=:default)
|
269
399
|
case source_format
|
270
|
-
when :default
|
271
|
-
ReaderState.new.read_record(input)
|
272
|
-
when :yaml
|
273
|
-
YAMLReader.read_record(input)
|
274
|
-
when :json
|
275
|
-
JSONReader.read_record(input)
|
276
400
|
when :csv, :tsv, Hash
|
277
401
|
CSVReader.read_record(input, source_format)
|
402
|
+
else
|
403
|
+
FORMAT_NAME_TO_READER[source_format].read_record(input)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def self.parse_if_necessary(source)
|
408
|
+
if source.kind_of? String
|
409
|
+
RecordReader.read_record(source)
|
410
|
+
else
|
411
|
+
source
|
278
412
|
end
|
279
413
|
end
|
280
414
|
end
|