data_spork 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZDBlMDM4OTU4ZDU2N2JjNDdiN2RkY2VjNDE3NDUyYmNkMzU5OTM3Nw==
5
+ data.tar.gz: !binary |-
6
+ OTRjZDVhNTRlMzkzZjJhMjVlOWUwZTk5NzRlZTNhZWMyMzQxZThmYg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MDIyYTg1ZTkyNDg4MDA5OTJhM2NjOTViNjk3NWQwN2I5YzYxYTM0ZTRhMzcw
10
+ YWNhZTEyNjk0MWUzMGE0ZjQyY2U5NjRkN2Q1N2IwZTY4MWY1MTFiNmRlNWY3
11
+ ZDMwMWQ0Y2QwNDU5NGNhYzA0MGNmZDAyMTNlNmUzMzA0OGQ5ODA=
12
+ data.tar.gz: !binary |-
13
+ Zjc1MjcxNjc1MzhjZDhiNDI5ZGE0ZDJjNTQyYzRjNjkzZmY2YzU3Zjk3MDlk
14
+ ODI5ZTc2YmU4MzcxOTUyNjU4Y2IxMGM1MDA1YmYzZWExYmU4Njc3OTMwMDlk
15
+ YTU4NWI0YTgxNDE1N2E4YWJmZDdiNGYwZGU1NGYyMjAxYjZmZGI=
@@ -0,0 +1,371 @@
1
+ require 'google_drive'
2
+
3
+ module DataSpork
4
+ class Importer
5
+ attr_reader :row_num, :row, :headers, :input_type
6
+ attr_reader :xml_tags, :root_tag, :row_tag, :options
7
+ attr_accessor :col_num, :setup_state, :blank_row
8
+ attr_accessor :effective_date
9
+
10
+ # Entry point to convert the input menu and output it as a menu board.
11
+ #
12
+ # @param :input_type symbol indicating whether to output :xlsx or :csv
13
+ # @param :options hash with options to control the behavior of the conversion
14
+ def self.convert(input_type, options = nil)
15
+ new(input_type, options).convert
16
+ end
17
+
18
+ # Constructor
19
+ def initialize(input_type, options = nil)
20
+ @input_type = input_type
21
+ init_options options
22
+ @xml_tags = %w(menu_group_name name calories ingredients is_active trademarked is_good_cold )
23
+ @root_tag = 'MenuBoard'
24
+ @row_tag = 'menu_choice'
25
+ @location_filters = {
26
+ default: nil
27
+ }
28
+ @started_xml = false
29
+ @menu_board = {}
30
+ end
31
+
32
+ def init_options(options)
33
+ @options = { source_path: '.' }.merge(options ||= {})
34
+ @options[:output_path] = @options[:source_path] if @options[:output_path].nil? and @options[:output_file]
35
+ end
36
+
37
+ def effective_date_pattern
38
+ /^[Ee]ffective [Dd]ate+/
39
+ end
40
+
41
+ def map
42
+ {} # subclass should override
43
+ end
44
+
45
+ def source_name
46
+ base = DEFAULT_INPUT_NAME
47
+ modifier = ''
48
+ "#{base}#{modifier}.#{input_type}"
49
+ end
50
+
51
+ def input_pathname
52
+ Pathname(options[:source_path]).join(source_name).to_s
53
+ end
54
+
55
+ def output_pathname
56
+ Pathname(options[:output_path]).join('output')
57
+ end
58
+
59
+ def output_filename
60
+ p = output_pathname.join(options[:output_file])
61
+ p.sub_ext "#{file_modifier}#{p.extname}"
62
+ end
63
+
64
+ def file_modifier
65
+ ''
66
+ end
67
+
68
+ def started_xml
69
+ @started_xml = true
70
+ end
71
+
72
+ def started_xml?
73
+ @started_xml
74
+ end
75
+
76
+ def each_csv(&block)
77
+ CSV.foreach(input_pathname) do |row|
78
+ block.call row
79
+ end
80
+ end
81
+
82
+ def each_xlsx(&block)
83
+ excel = SimpleXlsxReader.open(input_pathname)
84
+ $stderr.puts "Excel file was opened and sheet name was found: #{excel.sheets.first.name}"
85
+ excel.sheets.first.rows.each do |row|
86
+ block.call row
87
+ end
88
+ end
89
+
90
+ def each(&block)
91
+ if csv?
92
+ each_csv &block
93
+ elsif xlsx?
94
+ each_xlsx &block
95
+ elsif google?
96
+ each_google &block
97
+ end
98
+ end
99
+
100
+ def csv?
101
+ input_type == :csv
102
+ end
103
+
104
+ def xlsx?
105
+ [ :xlsx, :xls ].include? input_type
106
+ end
107
+
108
+ # Drives the conversion of the CSV input file to XML formatted output.
109
+ # @param :path_to_csv string path name of the CSV input file
110
+ def convert
111
+ start
112
+ each do |row|
113
+ append row
114
+ end
115
+ finish
116
+ end
117
+
118
+ def start
119
+ if VERBOSE_IO_OPTIONS
120
+ $stderr.puts "options: #{options}"
121
+ $stderr.puts "input_pathname: #{input_pathname}, exists: #{File.exist?(input_pathname)}"
122
+ $stderr.puts "output_filename: #{output_filename}" if options[:output_file]
123
+ end
124
+ if options[:output_file]
125
+ output_pathname.mkpath
126
+ $stdout = File.open("#{output_filename}", 'w')
127
+ end
128
+ @row_num = 0
129
+ @headers = []
130
+ self.setup_state = :setup_xml
131
+ end
132
+
133
+ def finish
134
+ finish_xml
135
+ finish_capture
136
+ end
137
+
138
+ # Appends the specified row to the output.
139
+ # @param :row Array of values parsed from the CSV input.
140
+ def append(row)
141
+ @row = row
142
+ @row_num += 1
143
+ sanitize
144
+ output
145
+ end
146
+
147
+ # Sanitize the current row of data. This is done in place, so not worried about a return value.
148
+ def sanitize
149
+ self.col_num = 0
150
+ self.blank_row = true
151
+ row.collect! do |utf_8|
152
+ value = (ENCODE_VALUES ? "#{utf_8}".encode('iso-8859-1', xml: :text) : utf_8)
153
+ self.blank_row = false if blank_row and !value.blank?
154
+ sanitize_value(value) if headers? and SANITIZE_VALUES
155
+ substitute_value(value).tap do
156
+ self.col_num += 1
157
+ end
158
+ end
159
+ end
160
+
161
+ # Substitute field-specific values based on their position in the row.
162
+ # The returned value is substituted for the passed value.
163
+ # This method does not process columns that are not included in the output.
164
+ #
165
+ # Subclasses should not override this method, but should override #get_substitute_value instead.
166
+ #
167
+ # @param :value the value to be substituted
168
+ def substitute_value(value)
169
+ if headers? and output_column?
170
+ get_substitute_value(value)
171
+ else
172
+ value
173
+ end
174
+ end
175
+
176
+ # Overridden by subclasses to substitute field-specific values based on their position in the row.
177
+ # The returned value is substituted for the passed value.
178
+ # This method expects only columns that are included in the output.
179
+ #
180
+ # @param :value the value to be substituted
181
+ def get_substitute_value(value)
182
+ value
183
+ end
184
+
185
+ # Sanitize field-specific values based on their position in the row.
186
+ # The values must be modified in place, so there is no need to return a value.
187
+ # This method does not sanitize columns that are not included in the output.
188
+ #
189
+ # @param :value the value to be sanitized
190
+ def sanitize_value(value)
191
+ if headers? and output_column?
192
+ case header
193
+ when nil?
194
+ 0
195
+ end
196
+ end
197
+ end
198
+
199
+ # Answer true if the headers are already determined.
200
+ def headers?
201
+ !headers.empty?
202
+ end
203
+
204
+ def begin_menu_group(name)
205
+ self.menu_group_name = name
206
+ id = menu_board.size + 1
207
+ self.menu_group = { id: id, name: name, display_order: id, choices: [] }
208
+ menu_board[name] = menu_group
209
+ end
210
+
211
+ def capture_row
212
+ order = menu_group[:choices].size + 1
213
+ self.menu_choice = { id: row_num, display_order: order, menu_group_id: menu_group[:id], options: { } }
214
+ self.menu_group[:choices] << menu_choice
215
+ end
216
+
217
+ def capture_column
218
+ key = case header
219
+ when 'ingredients'
220
+ 'description'
221
+ when 'trademarked'
222
+ 'is_trademarked'
223
+ else
224
+ header
225
+ end
226
+ if %w(trademarked image_url).include?(key)
227
+ menu_choice[:options][key.to_sym] = col_value
228
+ else
229
+ menu_choice[key.to_sym] = col_value
230
+ end
231
+ end
232
+
233
+ def finish_capture
234
+ # empty - subclass should override
235
+ end
236
+
237
+ # Output the current row of data, which were parsed from the CSV input.
238
+ def output
239
+ unless reject?.tap { |r| puts "rejected #{row_num}: #{row}" if r and VERBOSE }
240
+ if headers.empty?
241
+ send setup_state
242
+ else
243
+ put_row #if location_filter?
244
+ end
245
+ end
246
+ end
247
+
248
+ # Answer true if rules dictate the current row should be discarded from processing.
249
+ def reject?
250
+ headers? and blank_row
251
+ end
252
+
253
+ # Answer true if the first_col value is the effective date header, and clip the effective date value.
254
+ def clip_effective_date?(first_col)
255
+ if first_col.match(effective_date_pattern)
256
+ self.effective_date = "#{row[1]}".strip
257
+ true
258
+ else
259
+ false
260
+ end
261
+ end
262
+
263
+ # Initializes the xml document and transfers setup_state to :setup
264
+ def setup_xml
265
+ start_xml
266
+ self.setup_state = :setup
267
+ send setup_state # automatically transition to next state
268
+ end
269
+
270
+ # Initializes the headers on the first row and optionally outputs them when VERBOSE=true.
271
+ def setup
272
+ row.each do |col|
273
+ headers << map[col]
274
+ end
275
+ puts "headers: #{row_num}: #{headers}" if VERBOSE
276
+ end
277
+
278
+ # Output the XML preface.
279
+ def start_xml
280
+ start_xml_document
281
+ start_xml_schema
282
+ started_xml
283
+ end
284
+
285
+ # Output the XML document preface.
286
+ def start_xml_document
287
+ puts '<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>'
288
+ end
289
+
290
+ # Output the schema with the root tag.
291
+ def start_xml_schema
292
+ puts begin_tag %Q(#{root_tag} #{schema_uri} effective_date="#{effective_date}")
293
+ end
294
+
295
+ def schema_uri
296
+ %q(xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance")
297
+ end
298
+
299
+ # Close out the XML file.
300
+ def finish_xml
301
+ puts end_tag root_tag if started_xml?
302
+ end
303
+
304
+ # Output the current row, one column at a time.
305
+ def put_row
306
+ puts "#{indent}#{begin_row}"
307
+ capture_row
308
+ row.each_index do |index|
309
+ self.col_num = index
310
+ if output_column?
311
+ put_column
312
+ capture_column
313
+ end
314
+ end
315
+ puts "#{indent}#{end_row}"
316
+ end
317
+
318
+ # Answer true when the current column should be included in the output.
319
+ def output_column?
320
+ xml_tags.include? header
321
+ end
322
+
323
+ # Answer the value for the current column of data, or for the specified index.
324
+ def col_value(index = nil)
325
+ row[index || col_num]
326
+ end
327
+
328
+ # Answer the header for the current column of data, or for the specified index.
329
+ def header(index = nil)
330
+ headers[index || col_num]
331
+ end
332
+
333
+ # Output the value enclosing it with the specified XML tag.
334
+ # @param :tag the xml tag name
335
+ # @param :value the value to output
336
+ def put_tag(tag, value)
337
+ puts "#{indent 2}#{begin_tag tag}#{value || ''}#{end_tag tag}"
338
+ end
339
+
340
+ # Output the current column enclosing it with XML tags.
341
+ def put_column
342
+ puts "#{indent 2}#{begin_tag}#{col_value}#{end_tag}"
343
+ end
344
+
345
+ # Answer a begin tag using the current header, or else override it with the sender's tag.
346
+ def begin_tag(h = nil)
347
+ "<#{h || header}>"
348
+ end
349
+
350
+ # Answer the end tag using the current header, or else override it with the sender's tag.
351
+ def end_tag(h = nil)
352
+ "</#{h || header}>"
353
+ end
354
+
355
+ # Answer the tag defining a row.
356
+ def begin_row
357
+ begin_tag(row_tag)
358
+ end
359
+
360
+ # Answer the closing tag for the current row.
361
+ def end_row
362
+ end_tag(row_tag)
363
+ end
364
+
365
+ # Answer a string prefix for the specified indent level.
366
+ # @param :level specifies the indent level; default is 1.
367
+ def indent(level = 1)
368
+ "\t" * level
369
+ end
370
+ end
371
+ end
@@ -0,0 +1,3 @@
1
+ module DataSpork
2
+ VERSION = "0.0.3"
3
+ end
data/lib/data_spork.rb ADDED
@@ -0,0 +1,13 @@
1
+ require 'csv'
2
+ require 'data_spork/version'
3
+
4
+ module DataSpork
5
+ class Importer
6
+ VERBOSE = false
7
+ VERBOSE_IO_OPTIONS = ENV['VERBOSE'].eql?('true')
8
+ ENCODE_VALUES = false # skip encoding until a need is found
9
+ SANITIZE_VALUES = false # skip sanitizing until a need is found
10
+ end
11
+ end
12
+
13
+ require 'data_spork/importer'
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_spork
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Brian Jackson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '3.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '3.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: simple_xlsx_reader
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '0.9'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '0.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: google_drive
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '0.3'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.3'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '10.1'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '10.1'
83
+ description: Importer of CSV and Spreadsheet data.
84
+ email:
85
+ - bjackson@leadbaxter.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - lib/data_spork.rb
91
+ - lib/data_spork/importer.rb
92
+ - lib/data_spork/version.rb
93
+ homepage: http://bitbucket.org/leadbaxter/data_spork
94
+ licenses:
95
+ - MIT
96
+ metadata: {}
97
+ post_install_message:
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ! '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubyforge_project:
113
+ rubygems_version: 2.2.1
114
+ signing_key:
115
+ specification_version: 4
116
+ summary: Import CSV, Excel [.XLS, .XLSX] and Google Drive Spreadsheets.
117
+ test_files: []