ad_hoc_template 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,7 +14,7 @@ module AdHocTemplate
14
14
  end
15
15
 
16
16
  def self.choose_parser(hiki_source)
17
- hiki_source[/(?:\r?\n|\r)/] ? BlockParser : InlineParser
17
+ hiki_source[LINE_END_RE] ? BlockParser : InlineParser
18
18
  end
19
19
 
20
20
  private_class_method :choose_parser
@@ -11,8 +11,8 @@ module AdHocTemplate
11
11
  YAML.load(yaml_data)
12
12
  end
13
13
 
14
- def self.to_yaml(config_data)
15
- data = RecordReader.read_record(config_data)
14
+ def self.dump(config_data)
15
+ data = RecordReader.parse_if_necessary(config_data)
16
16
  YAML.dump(data)
17
17
  end
18
18
  end
@@ -22,13 +22,20 @@ module AdHocTemplate
22
22
  JSON.parse(json_data)
23
23
  end
24
24
 
25
- def self.to_json(config_data)
26
- data = RecordReader.read_record(config_data)
27
- JSON.dump(data)
25
+ def self.dump(config_data)
26
+ data = RecordReader.parse_if_necessary(config_data)
27
+ JSON.pretty_generate(data)
28
28
  end
29
29
  end
30
30
 
31
31
  module CSVReader
32
+ COL_SEP = {
33
+ csv: CSV::DEFAULT_OPTIONS[:col_sep],
34
+ tsv: "\t"
35
+ }
36
+
37
+ class NotSupportedError < StandardError; end
38
+
32
39
  def self.read_record(csv_data, config={ csv: nil })
33
40
  label, sep = parse_config(config)
34
41
  header, *data = CSV.new(csv_data, col_sep: sep).to_a
@@ -42,6 +49,19 @@ module AdHocTemplate
42
49
  end
43
50
  end
44
51
 
52
+ def self.dump(config_data, col_sep=COL_SEP[:csv])
53
+ data = RecordReader.parse_if_necessary(config_data)
54
+ raise NotSupportedError unless csv_compatible_format?(data)
55
+
56
+ if kv_pairs = find_sub_records(data)
57
+ records = hashes_to_arrays(kv_pairs)
58
+ else
59
+ records = data.to_a.transpose
60
+ end
61
+
62
+ array_to_csv(records, col_sep)
63
+ end
64
+
45
65
  def self.convert_to_hash(header, row_array)
46
66
  {}.tap do |record|
47
67
  header.zip(row_array).each do |key, value|
@@ -60,221 +80,335 @@ module AdHocTemplate
60
80
  when Hash
61
81
  format, label = config.to_a[0]
62
82
  end
63
- field_sep = format == :tsv ? "\t" : CSV::DEFAULT_OPTIONS[:col_sep]
64
- return label, field_sep
83
+ col_sep = COL_SEP[format||:csv]
84
+ return label, col_sep
65
85
  end
66
86
 
67
- private_class_method :convert_to_hash, :parse_config
68
- end
69
-
70
- SEPARATOR = /:\s*/o
71
- BLOCK_HEAD = /\A\/\/\/@/o
72
- ITERATION_HEAD = /\A\/\/\/@#/o
73
- EMPTY_LINE = /\A(?:\r?\n|\r)\Z/o
74
- ITERATION_MARK = /\A#/o
75
- READERS_RE = {
76
- key_value: SEPARATOR,
77
- iteration: ITERATION_HEAD,
78
- block: BLOCK_HEAD,
79
- empty_line: EMPTY_LINE,
80
- }
81
-
82
- class ReaderState
83
- attr_accessor :current_block_label
84
-
85
- def initialize(config={}, stack=[])
86
- @stack = stack
87
- @configs = [config]
88
- setup_reader
87
+ def self.csv_compatible_format?(data)
88
+ iteration_blocks_count = data.values.select {|v| v.kind_of? Array }.size
89
+ iteration_blocks_count == 0 or (iteration_blocks_count == 1 && data.size == 1)
89
90
  end
90
91
 
91
- def push(reader)
92
- @stack.push reader
92
+ def self.hashes_to_arrays(data)
93
+ headers = data.max_by {|h| h.keys.size }.keys
94
+ records = data.map {|record| headers.map {|header| record[header] } }
95
+ records.unshift headers
93
96
  end
94
97
 
95
- def pop
96
- @stack.pop unless @stack.length == 1
98
+ def self.find_sub_records(data)
99
+ data.values.find {|v| v.kind_of? Array }
97
100
  end
98
101
 
99
- def setup_stack(line)
100
- @stack[-1].setup_stack(line)
101
- end
102
+ def self.array_to_csv(records, col_sep)
103
+ # I do not adopt "records.map {|rec| rec.to_csv }.join",
104
+ # because I'm not sure if it is sufficient for certain data or not.
105
+ # For example, a field value may contain carriage returns or line feeds,
106
+ # and in that case, improper handling of the end of record would be damaging.
102
107
 
103
- def current_reader
104
- @stack[-1]
108
+ CSV.generate('', col_sep: col_sep) do |csv|
109
+ records.each {|record| csv << record }
110
+ end
105
111
  end
106
112
 
107
- def read(line)
108
- @stack[-1].read(line)
109
- end
113
+ private_class_method :convert_to_hash, :parse_config
114
+ private_class_method :csv_compatible_format?, :hashes_to_arrays
115
+ private_class_method :find_sub_records, :array_to_csv
116
+ end
110
117
 
111
- def push_new_record
112
- new_record = {}
113
- @configs.push new_record
114
- new_record
115
- end
118
+ module TSVReader
119
+ COL_SEP = CSVReader::COL_SEP
116
120
 
117
- def pop_current_record
118
- @configs.pop
121
+ def self.read_record(tsv_data, config={ tsv: nil })
122
+ config = { tsv: config } if config.kind_of? String
123
+ CSVReader.read_record(tsv_data, config)
119
124
  end
120
125
 
121
- def current_record
122
- @configs[-1]
126
+ def self.dump(config_data, col_sep=COL_SEP[:tsv])
127
+ CSVReader.dump(config_data, col_sep)
123
128
  end
129
+ end
124
130
 
125
- def parsed_record
126
- @configs[0]
127
- end
131
+ module DefaultFormReader
132
+ SEPARATOR = /:\s*/o
133
+ BLOCK_HEAD = /\A\/\/\/@/o
134
+ ITERATION_HEAD = /\A\/\/\/@#/o
135
+ EMPTY_LINE = /\A#{LINE_END_STR}\Z/o
136
+ ITERATION_MARK = /\A#/o
137
+ READERS_RE = {
138
+ key_value: SEPARATOR,
139
+ iteration: ITERATION_HEAD,
140
+ block: BLOCK_HEAD,
141
+ empty_line: EMPTY_LINE,
142
+ }
143
+
144
+ class ReaderState
145
+ attr_accessor :current_block_label
146
+
147
+ def initialize(config={}, stack=[])
148
+ @stack = stack
149
+ @configs = [config]
150
+ setup_reader
151
+ end
128
152
 
129
- def read_record(lines)
130
- lines = lines.each_line.to_a if lines.kind_of? String
131
- lines.each do |line|
132
- setup_stack(line)
133
- read(line)
153
+ def push(reader)
154
+ @stack.push reader
134
155
  end
135
- remove_trailing_empty_lines_from_last_block!
136
- parsed_record
137
- end
138
156
 
139
- def last_block_value
140
- current_record[current_block_label]
141
- end
157
+ def pop
158
+ @stack.pop unless @stack.length == 1
159
+ end
142
160
 
143
- def remove_trailing_empty_lines_from_last_block!
144
- if current_reader.kind_of? BlockReader
145
- last_block_value.sub!(/(#{$/})+\Z/, $/)
161
+ def setup_stack(line)
162
+ @stack[-1].setup_stack(line)
163
+ end
164
+
165
+ def current_reader
166
+ @stack[-1]
146
167
  end
147
- end
148
168
 
149
- private
169
+ def read(line)
170
+ @stack[-1].read(line)
171
+ end
172
+
173
+ def push_new_record
174
+ new_record = {}
175
+ @configs.push new_record
176
+ new_record
177
+ end
178
+
179
+ def pop_current_record
180
+ @configs.pop
181
+ end
182
+
183
+ def current_record
184
+ @configs[-1]
185
+ end
186
+
187
+ def parsed_record
188
+ @configs[0]
189
+ end
150
190
 
151
- def setup_reader
152
- Reader.setup_reader(self)
191
+ def read_record(lines)
192
+ lines = lines.each_line.to_a if lines.kind_of? String
193
+ lines.each do |line|
194
+ setup_stack(line)
195
+ read(line)
196
+ end
197
+ remove_trailing_empty_lines_from_last_block!
198
+ parsed_record
199
+ end
200
+
201
+ def last_block_value
202
+ current_record[current_block_label]
203
+ end
204
+
205
+ def remove_trailing_empty_lines_from_last_block!
206
+ if current_reader.kind_of? BlockReader
207
+ last_block_value.sub!(/(#{$/})+\Z/, $/)
208
+ end
209
+ end
210
+
211
+ private
212
+
213
+ def setup_reader
214
+ Reader.setup_reader(self)
215
+ end
153
216
  end
154
- end
155
217
 
156
- class Reader
157
- def self.setup_reader(stack)
158
- readers = {}
159
- {
160
- base: BaseReader,
161
- key_value: KeyValueReader,
162
- block: BlockReader,
163
- iteration: IterationReader,
164
- }.each do |k, v|
165
- readers[k] = v.new(stack, readers)
166
- end
167
- stack.push readers[:base]
168
- readers
218
+ class Reader
219
+ def self.setup_reader(stack)
220
+ readers = {}
221
+ {
222
+ base: BaseReader,
223
+ key_value: KeyValueReader,
224
+ block: BlockReader,
225
+ iteration: IterationReader,
226
+ }.each do |k, v|
227
+ readers[k] = v.new(stack, readers)
228
+ end
229
+ stack.push readers[:base]
230
+ readers
231
+ end
232
+
233
+ def initialize(stack, readers)
234
+ @stack = stack
235
+ @readers = readers
236
+ end
237
+
238
+ def pop_stack
239
+ @stack.pop
240
+ end
241
+
242
+ def read(line)
243
+ end
244
+
245
+ private
246
+
247
+ def push_reader_if_match(line, readers)
248
+ readers.each do |reader|
249
+ return @stack.push(@readers[reader]) if READERS_RE[reader] === line
250
+ end
251
+ end
252
+
253
+ def setup_new_block(line, initial_value)
254
+ label = line.sub(BLOCK_HEAD, "").chomp
255
+ @stack.current_record[label] ||= initial_value
256
+ @stack.current_block_label = label
257
+ end
169
258
  end
170
259
 
171
- def initialize(stack, readers)
172
- @stack = stack
173
- @readers = readers
260
+
261
+ class BaseReader < Reader
262
+ def setup_stack(line)
263
+ push_reader_if_match(line, [:iteration, :block, :key_value])
264
+ end
174
265
  end
175
266
 
176
- def pop_stack
177
- @stack.pop
267
+ class KeyValueReader < Reader
268
+ def setup_stack(line)
269
+ case line
270
+ when EMPTY_LINE, ITERATION_HEAD, BLOCK_HEAD
271
+ pop_stack
272
+ end
273
+ push_reader_if_match(line, [:iteration, :block])
274
+ end
275
+
276
+ def read(line)
277
+ key, value = line.split(SEPARATOR, 2)
278
+ @stack.current_record[key] = value.chomp
279
+ end
178
280
  end
179
281
 
180
- def read(line)
282
+ class BlockReader < Reader
283
+ def setup_stack(line)
284
+ case line
285
+ when ITERATION_HEAD, BLOCK_HEAD
286
+ @stack.remove_trailing_empty_lines_from_last_block!
287
+ pop_stack
288
+ end
289
+ push_reader_if_match(line, [:iteration, :block])
290
+ end
291
+
292
+ def read(line)
293
+ block_value = @stack.last_block_value
294
+ case line
295
+ when BLOCK_HEAD
296
+ setup_new_block(line, String.new)
297
+ when EMPTY_LINE
298
+ block_value << line unless block_value.empty?
299
+ else
300
+ block_value << line
301
+ end
302
+ end
181
303
  end
182
304
 
183
- private
305
+ class IterationReader < Reader
306
+ def setup_stack(line)
307
+ case line
308
+ when ITERATION_HEAD
309
+ @stack.pop_current_record
310
+ when BLOCK_HEAD
311
+ @stack.pop_current_record
312
+ pop_stack
313
+ @stack.push @readers[:block]
314
+ when SEPARATOR
315
+ @stack.pop_current_record
316
+ @stack.last_block_value.push @stack.push_new_record
317
+ @stack.push @readers[:key_value]
318
+ end
319
+ end
184
320
 
185
- def push_reader_if_match(line, readers)
186
- readers.each do |reader|
187
- return @stack.push(@readers[reader]) if READERS_RE[reader] === line
321
+ def read(line)
322
+ case line
323
+ when ITERATION_HEAD
324
+ setup_new_block(line, [])
325
+ @stack.push_new_record
326
+ end
188
327
  end
189
328
  end
190
329
 
191
- def setup_new_block(line, initial_value)
192
- label = line.sub(BLOCK_HEAD, "").chomp
193
- @stack.current_record[label] ||= initial_value
194
- @stack.current_block_label = label
330
+ def self.read_record(input)
331
+ ReaderState.new.read_record(input)
195
332
  end
196
- end
197
333
 
334
+ def self.dump(labels)
335
+ iteration_keys, kv_keys, block_keys = categorize_keys(labels)
198
336
 
199
- class BaseReader < Reader
200
- def setup_stack(line)
201
- push_reader_if_match(line, [:iteration, :block, :key_value])
337
+ key_value_part = format_key_value_pairs(kv_keys, labels)
338
+ iteration_part = format_iteration_block(iteration_keys, labels)
339
+ block_part = format_key_value_block(block_keys, labels)
340
+
341
+ [key_value_part, iteration_part, block_part].join($/).sub(/(#{$/}+)\Z/, $/)
202
342
  end
203
- end
204
343
 
205
- class KeyValueReader < Reader
206
- def setup_stack(line)
207
- case line
208
- when EMPTY_LINE, ITERATION_HEAD, BLOCK_HEAD
209
- pop_stack
210
- end
211
- push_reader_if_match(line, [:iteration, :block])
344
+ def self.format_key_value_pairs(key_names, labels={})
345
+ key_names.map {|key| "#{key}: #{labels[key]}#{$/}" }.join
212
346
  end
213
347
 
214
- def read(line)
215
- key, value = line.split(SEPARATOR, 2)
216
- @stack.current_record[key] = value.chomp
348
+ def self.format_key_value_block(key_names, labels)
349
+ [].tap do |blocks|
350
+ key_names.each do |key|
351
+ blocks.push "///@#{key}#{$/*2}#{labels[key]}"
352
+ end
353
+ end.join($/)
217
354
  end
218
- end
219
355
 
220
- class BlockReader < Reader
221
- def setup_stack(line)
222
- case line
223
- when ITERATION_HEAD, BLOCK_HEAD
224
- @stack.remove_trailing_empty_lines_from_last_block!
225
- pop_stack
226
- end
227
- push_reader_if_match(line, [:iteration, :block])
356
+ def self.format_iteration_block(key_names, labels)
357
+ key_names.map do |iteration_label|
358
+ iteration_block = ["///@#{iteration_label}#{$/}"]
359
+ labels[iteration_label].each do |sub_record|
360
+ iteration_block.push format_key_value_pairs(sub_record.keys, sub_record)
361
+ end
362
+ iteration_block.join($/)
363
+ end.join($/)
228
364
  end
229
365
 
230
- def read(line)
231
- block_value = @stack.last_block_value
232
- case line
233
- when BLOCK_HEAD
234
- setup_new_block(line, String.new)
235
- when EMPTY_LINE
236
- block_value << line unless block_value.empty?
237
- else
238
- block_value << line
366
+ def self.categorize_keys(labels)
367
+ iteration_part, rest = labels.partition do |e|
368
+ e[1].kind_of? Array
369
+ end.map {|e| e.map(&:first) }
370
+
371
+ block_part, key_value_part = rest.partition do |e|
372
+ LINE_END_RE =~ labels[e]
239
373
  end
374
+
375
+ return iteration_part, key_value_part, block_part
240
376
  end
377
+
378
+ private_class_method :format_key_value_pairs
379
+ private_class_method :format_key_value_block
380
+ private_class_method :format_iteration_block
381
+ private_class_method :categorize_keys
241
382
  end
242
383
 
243
- class IterationReader < Reader
244
- def setup_stack(line)
245
- case line
246
- when ITERATION_HEAD
247
- @stack.pop_current_record
248
- when BLOCK_HEAD
249
- @stack.pop_current_record
250
- pop_stack
251
- @stack.push @readers[:block]
252
- when SEPARATOR
253
- @stack.pop_current_record
254
- @stack.last_block_value.push @stack.push_new_record
255
- @stack.push @readers[:key_value]
256
- end
257
- end
384
+ FORMAT_NAME_TO_READER = {
385
+ yaml: YAMLReader,
386
+ json: JSONReader,
387
+ csv: CSVReader,
388
+ tsv: TSVReader,
389
+ default: DefaultFormReader,
390
+ }
258
391
 
259
- def read(line)
260
- case line
261
- when ITERATION_HEAD
262
- setup_new_block(line, [])
263
- @stack.push_new_record
264
- end
265
- end
392
+ FORMAT_NAME_TO_READER.default = DefaultFormReader
393
+
394
+ def self.dump(data_source, target_format=:default)
395
+ FORMAT_NAME_TO_READER[target_format].dump(data_source)
266
396
  end
267
397
 
268
398
  def self.read_record(input, source_format=:default)
269
399
  case source_format
270
- when :default
271
- ReaderState.new.read_record(input)
272
- when :yaml
273
- YAMLReader.read_record(input)
274
- when :json
275
- JSONReader.read_record(input)
276
400
  when :csv, :tsv, Hash
277
401
  CSVReader.read_record(input, source_format)
402
+ else
403
+ FORMAT_NAME_TO_READER[source_format].read_record(input)
404
+ end
405
+ end
406
+
407
+ def self.parse_if_necessary(source)
408
+ if source.kind_of? String
409
+ RecordReader.read_record(source)
410
+ else
411
+ source
278
412
  end
279
413
  end
280
414
  end