data_spork 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/data_spork/importer.rb +371 -0
- data/lib/data_spork/version.rb +3 -0
- data/lib/data_spork.rb +13 -0
- metadata +117 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ZDBlMDM4OTU4ZDU2N2JjNDdiN2RkY2VjNDE3NDUyYmNkMzU5OTM3Nw==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
OTRjZDVhNTRlMzkzZjJhMjVlOWUwZTk5NzRlZTNhZWMyMzQxZThmYg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MDIyYTg1ZTkyNDg4MDA5OTJhM2NjOTViNjk3NWQwN2I5YzYxYTM0ZTRhMzcw
|
10
|
+
YWNhZTEyNjk0MWUzMGE0ZjQyY2U5NjRkN2Q1N2IwZTY4MWY1MTFiNmRlNWY3
|
11
|
+
ZDMwMWQ0Y2QwNDU5NGNhYzA0MGNmZDAyMTNlNmUzMzA0OGQ5ODA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
Zjc1MjcxNjc1MzhjZDhiNDI5ZGE0ZDJjNTQyYzRjNjkzZmY2YzU3Zjk3MDlk
|
14
|
+
ODI5ZTc2YmU4MzcxOTUyNjU4Y2IxMGM1MDA1YmYzZWExYmU4Njc3OTMwMDlk
|
15
|
+
YTU4NWI0YTgxNDE1N2E4YWJmZDdiNGYwZGU1NGYyMjAxYjZmZGI=
|
@@ -0,0 +1,371 @@
|
|
1
|
+
require 'google_drive'
|
2
|
+
|
3
|
+
module DataSpork
|
4
|
+
class Importer
|
5
|
+
attr_reader :row_num, :row, :headers, :input_type
|
6
|
+
attr_reader :xml_tags, :root_tag, :row_tag, :options
|
7
|
+
attr_accessor :col_num, :setup_state, :blank_row
|
8
|
+
attr_accessor :effective_date
|
9
|
+
|
10
|
+
# Entry point to convert the input menu and output it as a menu board.
|
11
|
+
#
|
12
|
+
# @param :input_type symbol indicating whether to output :xlsx or :csv
|
13
|
+
# @param :options hash with options to control the behavior of the conversion
|
14
|
+
def self.convert(input_type, options = nil)
|
15
|
+
new(input_type, options).convert
|
16
|
+
end
|
17
|
+
|
18
|
+
# Constructor
|
19
|
+
def initialize(input_type, options = nil)
|
20
|
+
@input_type = input_type
|
21
|
+
init_options options
|
22
|
+
@xml_tags = %w(menu_group_name name calories ingredients is_active trademarked is_good_cold )
|
23
|
+
@root_tag = 'MenuBoard'
|
24
|
+
@row_tag = 'menu_choice'
|
25
|
+
@location_filters = {
|
26
|
+
default: nil
|
27
|
+
}
|
28
|
+
@started_xml = false
|
29
|
+
@menu_board = {}
|
30
|
+
end
|
31
|
+
|
32
|
+
def init_options(options)
|
33
|
+
@options = { source_path: '.' }.merge(options ||= {})
|
34
|
+
@options[:output_path] = @options[:source_path] if @options[:output_path].nil? and @options[:output_file]
|
35
|
+
end
|
36
|
+
|
37
|
+
def effective_date_pattern
|
38
|
+
/^[Ee]ffective [Dd]ate+/
|
39
|
+
end
|
40
|
+
|
41
|
+
def map
|
42
|
+
{} # subclass should override
|
43
|
+
end
|
44
|
+
|
45
|
+
def source_name
|
46
|
+
base = DEFAULT_INPUT_NAME
|
47
|
+
modifier = ''
|
48
|
+
"#{base}#{modifier}.#{input_type}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def input_pathname
|
52
|
+
Pathname(options[:source_path]).join(source_name).to_s
|
53
|
+
end
|
54
|
+
|
55
|
+
def output_pathname
|
56
|
+
Pathname(options[:output_path]).join('output')
|
57
|
+
end
|
58
|
+
|
59
|
+
def output_filename
|
60
|
+
p = output_pathname.join(options[:output_file])
|
61
|
+
p.sub_ext "#{file_modifier}#{p.extname}"
|
62
|
+
end
|
63
|
+
|
64
|
+
def file_modifier
|
65
|
+
''
|
66
|
+
end
|
67
|
+
|
68
|
+
def started_xml
|
69
|
+
@started_xml = true
|
70
|
+
end
|
71
|
+
|
72
|
+
def started_xml?
|
73
|
+
@started_xml
|
74
|
+
end
|
75
|
+
|
76
|
+
def each_csv(&block)
|
77
|
+
CSV.foreach(input_pathname) do |row|
|
78
|
+
block.call row
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def each_xlsx(&block)
|
83
|
+
excel = SimpleXlsxReader.open(input_pathname)
|
84
|
+
$stderr.puts "Excel file was opened and sheet name was found: #{excel.sheets.first.name}"
|
85
|
+
excel.sheets.first.rows.each do |row|
|
86
|
+
block.call row
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def each(&block)
|
91
|
+
if csv?
|
92
|
+
each_csv &block
|
93
|
+
elsif xlsx?
|
94
|
+
each_xlsx &block
|
95
|
+
elsif google?
|
96
|
+
each_google &block
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def csv?
|
101
|
+
input_type == :csv
|
102
|
+
end
|
103
|
+
|
104
|
+
def xlsx?
|
105
|
+
[ :xlsx, :xls ].include? input_type
|
106
|
+
end
|
107
|
+
|
108
|
+
# Drives the conversion of the CSV input file to XML formatted output.
|
109
|
+
# @param :path_to_csv string path name of the CSV input file
|
110
|
+
def convert
|
111
|
+
start
|
112
|
+
each do |row|
|
113
|
+
append row
|
114
|
+
end
|
115
|
+
finish
|
116
|
+
end
|
117
|
+
|
118
|
+
def start
|
119
|
+
if VERBOSE_IO_OPTIONS
|
120
|
+
$stderr.puts "options: #{options}"
|
121
|
+
$stderr.puts "input_pathname: #{input_pathname}, exists: #{File.exist?(input_pathname)}"
|
122
|
+
$stderr.puts "output_filename: #{output_filename}" if options[:output_file]
|
123
|
+
end
|
124
|
+
if options[:output_file]
|
125
|
+
output_pathname.mkpath
|
126
|
+
$stdout = File.open("#{output_filename}", 'w')
|
127
|
+
end
|
128
|
+
@row_num = 0
|
129
|
+
@headers = []
|
130
|
+
self.setup_state = :setup_xml
|
131
|
+
end
|
132
|
+
|
133
|
+
def finish
|
134
|
+
finish_xml
|
135
|
+
finish_capture
|
136
|
+
end
|
137
|
+
|
138
|
+
# Appends the specified row to the output.
|
139
|
+
# @param :row Array of values parsed from the CSV input.
|
140
|
+
def append(row)
|
141
|
+
@row = row
|
142
|
+
@row_num += 1
|
143
|
+
sanitize
|
144
|
+
output
|
145
|
+
end
|
146
|
+
|
147
|
+
# Sanitize the current row of data. This is done in place, so not worried about a return value.
|
148
|
+
def sanitize
|
149
|
+
self.col_num = 0
|
150
|
+
self.blank_row = true
|
151
|
+
row.collect! do |utf_8|
|
152
|
+
value = (ENCODE_VALUES ? "#{utf_8}".encode('iso-8859-1', xml: :text) : utf_8)
|
153
|
+
self.blank_row = false if blank_row and !value.blank?
|
154
|
+
sanitize_value(value) if headers? and SANITIZE_VALUES
|
155
|
+
substitute_value(value).tap do
|
156
|
+
self.col_num += 1
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Substitute field-specific values based on their position in the row.
|
162
|
+
# The returned value is substituted for the passed value.
|
163
|
+
# This method does not process columns that are not included in the output.
|
164
|
+
#
|
165
|
+
# Subclasses should not override this method, but should override #get_substitute_value instead.
|
166
|
+
#
|
167
|
+
# @param :value the value to be substituted
|
168
|
+
def substitute_value(value)
|
169
|
+
if headers? and output_column?
|
170
|
+
get_substitute_value(value)
|
171
|
+
else
|
172
|
+
value
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
# Overridden by subclasses to substitute field-specific values based on their position in the row.
|
177
|
+
# The returned value is substituted for the passed value.
|
178
|
+
# This method expects only columns that are included in the output.
|
179
|
+
#
|
180
|
+
# @param :value the value to be substituted
|
181
|
+
def get_substitute_value(value)
|
182
|
+
value
|
183
|
+
end
|
184
|
+
|
185
|
+
# Sanitize field-specific values based on their position in the row.
|
186
|
+
# The values must be modified in place, so there is no need to return a value.
|
187
|
+
# This method does not sanitize columns that are not included in the output.
|
188
|
+
#
|
189
|
+
# @param :value the value to be sanitized
|
190
|
+
def sanitize_value(value)
|
191
|
+
if headers? and output_column?
|
192
|
+
case header
|
193
|
+
when nil?
|
194
|
+
0
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# Answer true if the headers are already determined.
|
200
|
+
def headers?
|
201
|
+
!headers.empty?
|
202
|
+
end
|
203
|
+
|
204
|
+
def begin_menu_group(name)
|
205
|
+
self.menu_group_name = name
|
206
|
+
id = menu_board.size + 1
|
207
|
+
self.menu_group = { id: id, name: name, display_order: id, choices: [] }
|
208
|
+
menu_board[name] = menu_group
|
209
|
+
end
|
210
|
+
|
211
|
+
def capture_row
|
212
|
+
order = menu_group[:choices].size + 1
|
213
|
+
self.menu_choice = { id: row_num, display_order: order, menu_group_id: menu_group[:id], options: { } }
|
214
|
+
self.menu_group[:choices] << menu_choice
|
215
|
+
end
|
216
|
+
|
217
|
+
def capture_column
|
218
|
+
key = case header
|
219
|
+
when 'ingredients'
|
220
|
+
'description'
|
221
|
+
when 'trademarked'
|
222
|
+
'is_trademarked'
|
223
|
+
else
|
224
|
+
header
|
225
|
+
end
|
226
|
+
if %w(trademarked image_url).include?(key)
|
227
|
+
menu_choice[:options][key.to_sym] = col_value
|
228
|
+
else
|
229
|
+
menu_choice[key.to_sym] = col_value
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def finish_capture
|
234
|
+
# empty - subclass should override
|
235
|
+
end
|
236
|
+
|
237
|
+
# Output the current row of data, which were parsed from the CSV input.
|
238
|
+
def output
|
239
|
+
unless reject?.tap { |r| puts "rejected #{row_num}: #{row}" if r and VERBOSE }
|
240
|
+
if headers.empty?
|
241
|
+
send setup_state
|
242
|
+
else
|
243
|
+
put_row #if location_filter?
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
# Answer true if rules dictate the current row should be discarded from processing.
|
249
|
+
def reject?
|
250
|
+
headers? and blank_row
|
251
|
+
end
|
252
|
+
|
253
|
+
# Answer true if the first_col value is the effective date header, and clip the effective date value.
|
254
|
+
def clip_effective_date?(first_col)
|
255
|
+
if first_col.match(effective_date_pattern)
|
256
|
+
self.effective_date = "#{row[1]}".strip
|
257
|
+
true
|
258
|
+
else
|
259
|
+
false
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
# Initializes the xml document and transfers setup_state to :setup
|
264
|
+
def setup_xml
|
265
|
+
start_xml
|
266
|
+
self.setup_state = :setup
|
267
|
+
send setup_state # automatically transition to next state
|
268
|
+
end
|
269
|
+
|
270
|
+
# Initializes the headers on the first row and optionally outputs them when VERBOSE=true.
|
271
|
+
def setup
|
272
|
+
row.each do |col|
|
273
|
+
headers << map[col]
|
274
|
+
end
|
275
|
+
puts "headers: #{row_num}: #{headers}" if VERBOSE
|
276
|
+
end
|
277
|
+
|
278
|
+
# Output the XML preface.
|
279
|
+
def start_xml
|
280
|
+
start_xml_document
|
281
|
+
start_xml_schema
|
282
|
+
started_xml
|
283
|
+
end
|
284
|
+
|
285
|
+
# Output the XML document preface.
|
286
|
+
def start_xml_document
|
287
|
+
puts '<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>'
|
288
|
+
end
|
289
|
+
|
290
|
+
# Output the schema with the root tag.
|
291
|
+
def start_xml_schema
|
292
|
+
puts begin_tag %Q(#{root_tag} #{schema_uri} effective_date="#{effective_date}")
|
293
|
+
end
|
294
|
+
|
295
|
+
def schema_uri
|
296
|
+
%q(xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance")
|
297
|
+
end
|
298
|
+
|
299
|
+
# Close out the XML file.
|
300
|
+
def finish_xml
|
301
|
+
puts end_tag root_tag if started_xml?
|
302
|
+
end
|
303
|
+
|
304
|
+
# Output the current row, one column at a time.
|
305
|
+
def put_row
|
306
|
+
puts "#{indent}#{begin_row}"
|
307
|
+
capture_row
|
308
|
+
row.each_index do |index|
|
309
|
+
self.col_num = index
|
310
|
+
if output_column?
|
311
|
+
put_column
|
312
|
+
capture_column
|
313
|
+
end
|
314
|
+
end
|
315
|
+
puts "#{indent}#{end_row}"
|
316
|
+
end
|
317
|
+
|
318
|
+
# Answer true when the current column should be included in the output.
|
319
|
+
def output_column?
|
320
|
+
xml_tags.include? header
|
321
|
+
end
|
322
|
+
|
323
|
+
# Answer the value for the current column of data, or for the specified index.
|
324
|
+
def col_value(index = nil)
|
325
|
+
row[index || col_num]
|
326
|
+
end
|
327
|
+
|
328
|
+
# Answer the header for the current column of data, or for the specified index.
|
329
|
+
def header(index = nil)
|
330
|
+
headers[index || col_num]
|
331
|
+
end
|
332
|
+
|
333
|
+
# Output the value enclosing it with the specified XML tag.
|
334
|
+
# @param :tag the xml tag name
|
335
|
+
# @param :value the value to output
|
336
|
+
def put_tag(tag, value)
|
337
|
+
puts "#{indent 2}#{begin_tag tag}#{value || ''}#{end_tag tag}"
|
338
|
+
end
|
339
|
+
|
340
|
+
# Output the current column enclosing it with XML tags.
|
341
|
+
def put_column
|
342
|
+
puts "#{indent 2}#{begin_tag}#{col_value}#{end_tag}"
|
343
|
+
end
|
344
|
+
|
345
|
+
# Answer a begin tag using the current header, or else override it with the sender's tag.
|
346
|
+
def begin_tag(h = nil)
|
347
|
+
"<#{h || header}>"
|
348
|
+
end
|
349
|
+
|
350
|
+
# Answer the end tag using the current header, or else override it with the sender's tag.
|
351
|
+
def end_tag(h = nil)
|
352
|
+
"</#{h || header}>"
|
353
|
+
end
|
354
|
+
|
355
|
+
# Answer the tag defining a row.
|
356
|
+
def begin_row
|
357
|
+
begin_tag(row_tag)
|
358
|
+
end
|
359
|
+
|
360
|
+
# Answer the closing tag for the current row.
|
361
|
+
def end_row
|
362
|
+
end_tag(row_tag)
|
363
|
+
end
|
364
|
+
|
365
|
+
# Answer a string prefix for the specified indent level.
|
366
|
+
# @param :level specifies the indent level; default is 1.
|
367
|
+
def indent(level = 1)
|
368
|
+
"\t" * level
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
data/lib/data_spork.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'data_spork/version'
|
3
|
+
|
4
|
+
module DataSpork
|
5
|
+
class Importer
|
6
|
+
VERBOSE = false
|
7
|
+
VERBOSE_IO_OPTIONS = ENV['VERBOSE'].eql?('true')
|
8
|
+
ENCODE_VALUES = false # skip encoding until a need is found
|
9
|
+
SANITIZE_VALUES = false # skip sanitizing until a need is found
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'data_spork/importer'
|
metadata
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: data_spork
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brian Jackson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-02-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rails
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: simple_xlsx_reader
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.9'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.9'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: google_drive
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.3'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.3'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '10.1'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '10.1'
|
83
|
+
description: Importer of CSV and Spreadsheet data.
|
84
|
+
email:
|
85
|
+
- bjackson@leadbaxter.com
|
86
|
+
executables: []
|
87
|
+
extensions: []
|
88
|
+
extra_rdoc_files: []
|
89
|
+
files:
|
90
|
+
- lib/data_spork.rb
|
91
|
+
- lib/data_spork/importer.rb
|
92
|
+
- lib/data_spork/version.rb
|
93
|
+
homepage: http://bitbucket.org/leadbaxter/data_spork
|
94
|
+
licenses:
|
95
|
+
- MIT
|
96
|
+
metadata: {}
|
97
|
+
post_install_message:
|
98
|
+
rdoc_options: []
|
99
|
+
require_paths:
|
100
|
+
- lib
|
101
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - ! '>='
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '0'
|
106
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
requirements: []
|
112
|
+
rubyforge_project:
|
113
|
+
rubygems_version: 2.2.1
|
114
|
+
signing_key:
|
115
|
+
specification_version: 4
|
116
|
+
summary: Import CSV, Excel [.XLS, .XLSX] and Google Drive Spreadsheets.
|
117
|
+
test_files: []
|