ad_hoc_template 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/ad_hoc_template.rb +24 -3
- data/lib/ad_hoc_template/command_line_interface.rb +27 -11
- data/lib/ad_hoc_template/entry_format_generator.rb +49 -0
- data/lib/ad_hoc_template/parser.rb +127 -58
- data/lib/ad_hoc_template/pseudohiki_formatter.rb +1 -1
- data/lib/ad_hoc_template/record_reader.rb +294 -160
- data/lib/ad_hoc_template/version.rb +1 -1
- data/spec/ad_hoc_template_spec.rb +151 -5
- data/spec/command_line_interface_spec.rb +107 -7
- data/spec/entry_format_generator_spec.rb +163 -0
- data/spec/parser_spec.rb +114 -18
- data/spec/pseudohiki_formatter_spec.rb +2 -2
- data/spec/record_reader_spec.rb +253 -9
- metadata +6 -3
@@ -11,8 +11,8 @@ module AdHocTemplate
|
|
11
11
|
YAML.load(yaml_data)
|
12
12
|
end
|
13
13
|
|
14
|
-
def self.
|
15
|
-
data = RecordReader.
|
14
|
+
def self.dump(config_data)
|
15
|
+
data = RecordReader.parse_if_necessary(config_data)
|
16
16
|
YAML.dump(data)
|
17
17
|
end
|
18
18
|
end
|
@@ -22,13 +22,20 @@ module AdHocTemplate
|
|
22
22
|
JSON.parse(json_data)
|
23
23
|
end
|
24
24
|
|
25
|
-
def self.
|
26
|
-
data = RecordReader.
|
27
|
-
JSON.
|
25
|
+
def self.dump(config_data)
|
26
|
+
data = RecordReader.parse_if_necessary(config_data)
|
27
|
+
JSON.pretty_generate(data)
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
31
|
module CSVReader
|
32
|
+
COL_SEP = {
|
33
|
+
csv: CSV::DEFAULT_OPTIONS[:col_sep],
|
34
|
+
tsv: "\t"
|
35
|
+
}
|
36
|
+
|
37
|
+
class NotSupportedError < StandardError; end
|
38
|
+
|
32
39
|
def self.read_record(csv_data, config={ csv: nil })
|
33
40
|
label, sep = parse_config(config)
|
34
41
|
header, *data = CSV.new(csv_data, col_sep: sep).to_a
|
@@ -42,6 +49,19 @@ module AdHocTemplate
|
|
42
49
|
end
|
43
50
|
end
|
44
51
|
|
52
|
+
def self.dump(config_data, col_sep=COL_SEP[:csv])
|
53
|
+
data = RecordReader.parse_if_necessary(config_data)
|
54
|
+
raise NotSupportedError unless csv_compatible_format?(data)
|
55
|
+
|
56
|
+
if kv_pairs = find_sub_records(data)
|
57
|
+
records = hashes_to_arrays(kv_pairs)
|
58
|
+
else
|
59
|
+
records = data.to_a.transpose
|
60
|
+
end
|
61
|
+
|
62
|
+
array_to_csv(records, col_sep)
|
63
|
+
end
|
64
|
+
|
45
65
|
def self.convert_to_hash(header, row_array)
|
46
66
|
{}.tap do |record|
|
47
67
|
header.zip(row_array).each do |key, value|
|
@@ -60,221 +80,335 @@ module AdHocTemplate
|
|
60
80
|
when Hash
|
61
81
|
format, label = config.to_a[0]
|
62
82
|
end
|
63
|
-
|
64
|
-
return label,
|
83
|
+
col_sep = COL_SEP[format||:csv]
|
84
|
+
return label, col_sep
|
65
85
|
end
|
66
86
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
SEPARATOR = /:\s*/o
|
71
|
-
BLOCK_HEAD = /\A\/\/\/@/o
|
72
|
-
ITERATION_HEAD = /\A\/\/\/@#/o
|
73
|
-
EMPTY_LINE = /\A(?:\r?\n|\r)\Z/o
|
74
|
-
ITERATION_MARK = /\A#/o
|
75
|
-
READERS_RE = {
|
76
|
-
key_value: SEPARATOR,
|
77
|
-
iteration: ITERATION_HEAD,
|
78
|
-
block: BLOCK_HEAD,
|
79
|
-
empty_line: EMPTY_LINE,
|
80
|
-
}
|
81
|
-
|
82
|
-
class ReaderState
|
83
|
-
attr_accessor :current_block_label
|
84
|
-
|
85
|
-
def initialize(config={}, stack=[])
|
86
|
-
@stack = stack
|
87
|
-
@configs = [config]
|
88
|
-
setup_reader
|
87
|
+
def self.csv_compatible_format?(data)
|
88
|
+
iteration_blocks_count = data.values.select {|v| v.kind_of? Array }.size
|
89
|
+
iteration_blocks_count == 0 or (iteration_blocks_count == 1 && data.size == 1)
|
89
90
|
end
|
90
91
|
|
91
|
-
def
|
92
|
-
|
92
|
+
def self.hashes_to_arrays(data)
|
93
|
+
headers = data.max_by {|h| h.keys.size }.keys
|
94
|
+
records = data.map {|record| headers.map {|header| record[header] } }
|
95
|
+
records.unshift headers
|
93
96
|
end
|
94
97
|
|
95
|
-
def
|
96
|
-
|
98
|
+
def self.find_sub_records(data)
|
99
|
+
data.values.find {|v| v.kind_of? Array }
|
97
100
|
end
|
98
101
|
|
99
|
-
def
|
100
|
-
|
101
|
-
|
102
|
+
def self.array_to_csv(records, col_sep)
|
103
|
+
# I do not adopt "records.map {|rec| rec.to_csv }.join",
|
104
|
+
# because I'm not sure if it is sufficient for certain data or not.
|
105
|
+
# For example, a field value may contain carriage returns or line feeds,
|
106
|
+
# and in that case, improper handling of the end of record would be damaging.
|
102
107
|
|
103
|
-
|
104
|
-
|
108
|
+
CSV.generate('', col_sep: col_sep) do |csv|
|
109
|
+
records.each {|record| csv << record }
|
110
|
+
end
|
105
111
|
end
|
106
112
|
|
107
|
-
|
108
|
-
|
109
|
-
|
113
|
+
private_class_method :convert_to_hash, :parse_config
|
114
|
+
private_class_method :csv_compatible_format?, :hashes_to_arrays
|
115
|
+
private_class_method :find_sub_records, :array_to_csv
|
116
|
+
end
|
110
117
|
|
111
|
-
|
112
|
-
|
113
|
-
@configs.push new_record
|
114
|
-
new_record
|
115
|
-
end
|
118
|
+
module TSVReader
|
119
|
+
COL_SEP = CSVReader::COL_SEP
|
116
120
|
|
117
|
-
def
|
118
|
-
|
121
|
+
def self.read_record(tsv_data, config={ tsv: nil })
|
122
|
+
config = { tsv: config } if config.kind_of? String
|
123
|
+
CSVReader.read_record(tsv_data, config)
|
119
124
|
end
|
120
125
|
|
121
|
-
def
|
122
|
-
|
126
|
+
def self.dump(config_data, col_sep=COL_SEP[:tsv])
|
127
|
+
CSVReader.dump(config_data, col_sep)
|
123
128
|
end
|
129
|
+
end
|
124
130
|
|
125
|
-
|
126
|
-
|
127
|
-
|
131
|
+
module DefaultFormReader
|
132
|
+
SEPARATOR = /:\s*/o
|
133
|
+
BLOCK_HEAD = /\A\/\/\/@/o
|
134
|
+
ITERATION_HEAD = /\A\/\/\/@#/o
|
135
|
+
EMPTY_LINE = /\A#{LINE_END_STR}\Z/o
|
136
|
+
ITERATION_MARK = /\A#/o
|
137
|
+
READERS_RE = {
|
138
|
+
key_value: SEPARATOR,
|
139
|
+
iteration: ITERATION_HEAD,
|
140
|
+
block: BLOCK_HEAD,
|
141
|
+
empty_line: EMPTY_LINE,
|
142
|
+
}
|
143
|
+
|
144
|
+
class ReaderState
|
145
|
+
attr_accessor :current_block_label
|
146
|
+
|
147
|
+
def initialize(config={}, stack=[])
|
148
|
+
@stack = stack
|
149
|
+
@configs = [config]
|
150
|
+
setup_reader
|
151
|
+
end
|
128
152
|
|
129
|
-
|
130
|
-
|
131
|
-
lines.each do |line|
|
132
|
-
setup_stack(line)
|
133
|
-
read(line)
|
153
|
+
def push(reader)
|
154
|
+
@stack.push reader
|
134
155
|
end
|
135
|
-
remove_trailing_empty_lines_from_last_block!
|
136
|
-
parsed_record
|
137
|
-
end
|
138
156
|
|
139
|
-
|
140
|
-
|
141
|
-
|
157
|
+
def pop
|
158
|
+
@stack.pop unless @stack.length == 1
|
159
|
+
end
|
142
160
|
|
143
|
-
|
144
|
-
|
145
|
-
|
161
|
+
def setup_stack(line)
|
162
|
+
@stack[-1].setup_stack(line)
|
163
|
+
end
|
164
|
+
|
165
|
+
def current_reader
|
166
|
+
@stack[-1]
|
146
167
|
end
|
147
|
-
end
|
148
168
|
|
149
|
-
|
169
|
+
def read(line)
|
170
|
+
@stack[-1].read(line)
|
171
|
+
end
|
172
|
+
|
173
|
+
def push_new_record
|
174
|
+
new_record = {}
|
175
|
+
@configs.push new_record
|
176
|
+
new_record
|
177
|
+
end
|
178
|
+
|
179
|
+
def pop_current_record
|
180
|
+
@configs.pop
|
181
|
+
end
|
182
|
+
|
183
|
+
def current_record
|
184
|
+
@configs[-1]
|
185
|
+
end
|
186
|
+
|
187
|
+
def parsed_record
|
188
|
+
@configs[0]
|
189
|
+
end
|
150
190
|
|
151
|
-
|
152
|
-
|
191
|
+
def read_record(lines)
|
192
|
+
lines = lines.each_line.to_a if lines.kind_of? String
|
193
|
+
lines.each do |line|
|
194
|
+
setup_stack(line)
|
195
|
+
read(line)
|
196
|
+
end
|
197
|
+
remove_trailing_empty_lines_from_last_block!
|
198
|
+
parsed_record
|
199
|
+
end
|
200
|
+
|
201
|
+
def last_block_value
|
202
|
+
current_record[current_block_label]
|
203
|
+
end
|
204
|
+
|
205
|
+
def remove_trailing_empty_lines_from_last_block!
|
206
|
+
if current_reader.kind_of? BlockReader
|
207
|
+
last_block_value.sub!(/(#{$/})+\Z/, $/)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
private
|
212
|
+
|
213
|
+
def setup_reader
|
214
|
+
Reader.setup_reader(self)
|
215
|
+
end
|
153
216
|
end
|
154
|
-
end
|
155
217
|
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
218
|
+
class Reader
|
219
|
+
def self.setup_reader(stack)
|
220
|
+
readers = {}
|
221
|
+
{
|
222
|
+
base: BaseReader,
|
223
|
+
key_value: KeyValueReader,
|
224
|
+
block: BlockReader,
|
225
|
+
iteration: IterationReader,
|
226
|
+
}.each do |k, v|
|
227
|
+
readers[k] = v.new(stack, readers)
|
228
|
+
end
|
229
|
+
stack.push readers[:base]
|
230
|
+
readers
|
231
|
+
end
|
232
|
+
|
233
|
+
def initialize(stack, readers)
|
234
|
+
@stack = stack
|
235
|
+
@readers = readers
|
236
|
+
end
|
237
|
+
|
238
|
+
def pop_stack
|
239
|
+
@stack.pop
|
240
|
+
end
|
241
|
+
|
242
|
+
def read(line)
|
243
|
+
end
|
244
|
+
|
245
|
+
private
|
246
|
+
|
247
|
+
def push_reader_if_match(line, readers)
|
248
|
+
readers.each do |reader|
|
249
|
+
return @stack.push(@readers[reader]) if READERS_RE[reader] === line
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def setup_new_block(line, initial_value)
|
254
|
+
label = line.sub(BLOCK_HEAD, "").chomp
|
255
|
+
@stack.current_record[label] ||= initial_value
|
256
|
+
@stack.current_block_label = label
|
257
|
+
end
|
169
258
|
end
|
170
259
|
|
171
|
-
|
172
|
-
|
173
|
-
|
260
|
+
|
261
|
+
class BaseReader < Reader
|
262
|
+
def setup_stack(line)
|
263
|
+
push_reader_if_match(line, [:iteration, :block, :key_value])
|
264
|
+
end
|
174
265
|
end
|
175
266
|
|
176
|
-
|
177
|
-
|
267
|
+
class KeyValueReader < Reader
|
268
|
+
def setup_stack(line)
|
269
|
+
case line
|
270
|
+
when EMPTY_LINE, ITERATION_HEAD, BLOCK_HEAD
|
271
|
+
pop_stack
|
272
|
+
end
|
273
|
+
push_reader_if_match(line, [:iteration, :block])
|
274
|
+
end
|
275
|
+
|
276
|
+
def read(line)
|
277
|
+
key, value = line.split(SEPARATOR, 2)
|
278
|
+
@stack.current_record[key] = value.chomp
|
279
|
+
end
|
178
280
|
end
|
179
281
|
|
180
|
-
|
282
|
+
class BlockReader < Reader
|
283
|
+
def setup_stack(line)
|
284
|
+
case line
|
285
|
+
when ITERATION_HEAD, BLOCK_HEAD
|
286
|
+
@stack.remove_trailing_empty_lines_from_last_block!
|
287
|
+
pop_stack
|
288
|
+
end
|
289
|
+
push_reader_if_match(line, [:iteration, :block])
|
290
|
+
end
|
291
|
+
|
292
|
+
def read(line)
|
293
|
+
block_value = @stack.last_block_value
|
294
|
+
case line
|
295
|
+
when BLOCK_HEAD
|
296
|
+
setup_new_block(line, String.new)
|
297
|
+
when EMPTY_LINE
|
298
|
+
block_value << line unless block_value.empty?
|
299
|
+
else
|
300
|
+
block_value << line
|
301
|
+
end
|
302
|
+
end
|
181
303
|
end
|
182
304
|
|
183
|
-
|
305
|
+
class IterationReader < Reader
|
306
|
+
def setup_stack(line)
|
307
|
+
case line
|
308
|
+
when ITERATION_HEAD
|
309
|
+
@stack.pop_current_record
|
310
|
+
when BLOCK_HEAD
|
311
|
+
@stack.pop_current_record
|
312
|
+
pop_stack
|
313
|
+
@stack.push @readers[:block]
|
314
|
+
when SEPARATOR
|
315
|
+
@stack.pop_current_record
|
316
|
+
@stack.last_block_value.push @stack.push_new_record
|
317
|
+
@stack.push @readers[:key_value]
|
318
|
+
end
|
319
|
+
end
|
184
320
|
|
185
|
-
|
186
|
-
|
187
|
-
|
321
|
+
def read(line)
|
322
|
+
case line
|
323
|
+
when ITERATION_HEAD
|
324
|
+
setup_new_block(line, [])
|
325
|
+
@stack.push_new_record
|
326
|
+
end
|
188
327
|
end
|
189
328
|
end
|
190
329
|
|
191
|
-
def
|
192
|
-
|
193
|
-
@stack.current_record[label] ||= initial_value
|
194
|
-
@stack.current_block_label = label
|
330
|
+
def self.read_record(input)
|
331
|
+
ReaderState.new.read_record(input)
|
195
332
|
end
|
196
|
-
end
|
197
333
|
|
334
|
+
def self.dump(labels)
|
335
|
+
iteration_keys, kv_keys, block_keys = categorize_keys(labels)
|
198
336
|
|
199
|
-
|
200
|
-
|
201
|
-
|
337
|
+
key_value_part = format_key_value_pairs(kv_keys, labels)
|
338
|
+
iteration_part = format_iteration_block(iteration_keys, labels)
|
339
|
+
block_part = format_key_value_block(block_keys, labels)
|
340
|
+
|
341
|
+
[key_value_part, iteration_part, block_part].join($/).sub(/(#{$/}+)\Z/, $/)
|
202
342
|
end
|
203
|
-
end
|
204
343
|
|
205
|
-
|
206
|
-
|
207
|
-
case line
|
208
|
-
when EMPTY_LINE, ITERATION_HEAD, BLOCK_HEAD
|
209
|
-
pop_stack
|
210
|
-
end
|
211
|
-
push_reader_if_match(line, [:iteration, :block])
|
344
|
+
def self.format_key_value_pairs(key_names, labels={})
|
345
|
+
key_names.map {|key| "#{key}: #{labels[key]}#{$/}" }.join
|
212
346
|
end
|
213
347
|
|
214
|
-
def
|
215
|
-
|
216
|
-
|
348
|
+
def self.format_key_value_block(key_names, labels)
|
349
|
+
[].tap do |blocks|
|
350
|
+
key_names.each do |key|
|
351
|
+
blocks.push "///@#{key}#{$/*2}#{labels[key]}"
|
352
|
+
end
|
353
|
+
end.join($/)
|
217
354
|
end
|
218
|
-
end
|
219
355
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
356
|
+
def self.format_iteration_block(key_names, labels)
|
357
|
+
key_names.map do |iteration_label|
|
358
|
+
iteration_block = ["///@#{iteration_label}#{$/}"]
|
359
|
+
labels[iteration_label].each do |sub_record|
|
360
|
+
iteration_block.push format_key_value_pairs(sub_record.keys, sub_record)
|
361
|
+
end
|
362
|
+
iteration_block.join($/)
|
363
|
+
end.join($/)
|
228
364
|
end
|
229
365
|
|
230
|
-
def
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
else
|
238
|
-
block_value << line
|
366
|
+
def self.categorize_keys(labels)
|
367
|
+
iteration_part, rest = labels.partition do |e|
|
368
|
+
e[1].kind_of? Array
|
369
|
+
end.map {|e| e.map(&:first) }
|
370
|
+
|
371
|
+
block_part, key_value_part = rest.partition do |e|
|
372
|
+
LINE_END_RE =~ labels[e]
|
239
373
|
end
|
374
|
+
|
375
|
+
return iteration_part, key_value_part, block_part
|
240
376
|
end
|
377
|
+
|
378
|
+
private_class_method :format_key_value_pairs
|
379
|
+
private_class_method :format_key_value_block
|
380
|
+
private_class_method :format_iteration_block
|
381
|
+
private_class_method :categorize_keys
|
241
382
|
end
|
242
383
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
pop_stack
|
251
|
-
@stack.push @readers[:block]
|
252
|
-
when SEPARATOR
|
253
|
-
@stack.pop_current_record
|
254
|
-
@stack.last_block_value.push @stack.push_new_record
|
255
|
-
@stack.push @readers[:key_value]
|
256
|
-
end
|
257
|
-
end
|
384
|
+
FORMAT_NAME_TO_READER = {
|
385
|
+
yaml: YAMLReader,
|
386
|
+
json: JSONReader,
|
387
|
+
csv: CSVReader,
|
388
|
+
tsv: TSVReader,
|
389
|
+
default: DefaultFormReader,
|
390
|
+
}
|
258
391
|
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
@stack.push_new_record
|
264
|
-
end
|
265
|
-
end
|
392
|
+
FORMAT_NAME_TO_READER.default = DefaultFormReader
|
393
|
+
|
394
|
+
def self.dump(data_source, target_format=:default)
|
395
|
+
FORMAT_NAME_TO_READER[target_format].dump(data_source)
|
266
396
|
end
|
267
397
|
|
268
398
|
def self.read_record(input, source_format=:default)
|
269
399
|
case source_format
|
270
|
-
when :default
|
271
|
-
ReaderState.new.read_record(input)
|
272
|
-
when :yaml
|
273
|
-
YAMLReader.read_record(input)
|
274
|
-
when :json
|
275
|
-
JSONReader.read_record(input)
|
276
400
|
when :csv, :tsv, Hash
|
277
401
|
CSVReader.read_record(input, source_format)
|
402
|
+
else
|
403
|
+
FORMAT_NAME_TO_READER[source_format].read_record(input)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def self.parse_if_necessary(source)
|
408
|
+
if source.kind_of? String
|
409
|
+
RecordReader.read_record(source)
|
410
|
+
else
|
411
|
+
source
|
278
412
|
end
|
279
413
|
end
|
280
414
|
end
|