data_spork 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/data_spork/base_reader.rb +30 -0
- data/lib/data_spork/google_spreadsheet.rb +66 -0
- data/lib/data_spork/importer.rb +43 -125
- data/lib/data_spork/version.rb +1 -1
- data/lib/data_spork/xml_writer.rb +93 -0
- data/lib/data_spork.rb +0 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OWE4Y2U5ODQyMzI2YmQzYmJhYTdkY2IxYTBjNGMxMjliZWQxYjVlMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YjgwOWVjMmQ4NjRhODk4ZTAxNWI1NDVlZDMyNmQ5YTY0YzU1MWVhNw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZTZmOWRmYzY3ZTE2YWU3YjA0ZGM4ZDAyY2M5YzJiMzJiODg3NmE2ZjViZTM5
|
10
|
+
M2FmYjMwOGFmYTNiNTI4NGE0MzNiNmNjOThmYmU5N2E0NDI5YzQ3MjFlMDgx
|
11
|
+
ZWI3MjUwM2E0YTE3NjRhYmZiODYzMWM4NmRmZDBmOTQ1YjZiNmU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YzNjZTUzN2JjNWQzYTE4Y2Q2ZjY3YmI2ZWYzMTJmYmYyNGQ2OWQyN2ViY2Qy
|
14
|
+
MDY3ODBjNTIyZWE0MDI3N2Q5OWEzYzYzYmVkMzNkZWI4Yjg1NTA2MzgxMGM5
|
15
|
+
NTQyYzJkMzJkZGUyZjBkZGJjNjE3NDkzZGYyOWRkOWI1NTRmMDQ=
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module DataSpork
|
4
|
+
class BaseReader
|
5
|
+
attr_reader :owner
|
6
|
+
delegate :options, :input_pathname, :to => :owner
|
7
|
+
|
8
|
+
def initialize(owner)
|
9
|
+
@owner = owner
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class CSV_Reader < BaseReader
|
14
|
+
def each(&block)
|
15
|
+
CSV.foreach(input_pathname) do |row|
|
16
|
+
block.call row
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class XLSX_Reader < BaseReader
|
22
|
+
def each(&block)
|
23
|
+
excel = SimpleXlsxReader.open(input_pathname)
|
24
|
+
print_error "Excel file was opened and sheet name was found: #{excel.sheets.first.name}"
|
25
|
+
excel.sheets.first.rows.each do |row|
|
26
|
+
block.call row
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'google_drive'
|
2
|
+
|
3
|
+
module DataSpork
|
4
|
+
class Importer::GoogleSpreadsheet < Importer
|
5
|
+
|
6
|
+
def init_options(options)
|
7
|
+
super
|
8
|
+
unless @options[:google].present?
|
9
|
+
@options[:google] = {
|
10
|
+
user: ENV['google_user'],
|
11
|
+
password: ENV['google_pwd']
|
12
|
+
}
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def reader
|
17
|
+
if google?
|
18
|
+
Reader.new(self)
|
19
|
+
else
|
20
|
+
super
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def google?
|
25
|
+
[ :google, :drive ].include?(input_type)
|
26
|
+
end
|
27
|
+
|
28
|
+
def google
|
29
|
+
options[:google]
|
30
|
+
end
|
31
|
+
|
32
|
+
def google_user
|
33
|
+
google[:user]
|
34
|
+
end
|
35
|
+
|
36
|
+
def google_password
|
37
|
+
google[:password]
|
38
|
+
end
|
39
|
+
|
40
|
+
def spreadsheet_title
|
41
|
+
google[:spreadsheet_title]
|
42
|
+
end
|
43
|
+
|
44
|
+
def worksheet_title
|
45
|
+
google[:worksheet_title]
|
46
|
+
end
|
47
|
+
|
48
|
+
class Reader < DataSpork::BaseReader
|
49
|
+
delegate :print, :print_error, :to => :owner
|
50
|
+
delegate :google_user, :google_password, :spreadsheet_title, :worksheet_title, :to => :owner
|
51
|
+
|
52
|
+
def each(&block)
|
53
|
+
google = GoogleDrive.login(google_user, google_password)
|
54
|
+
print_error "GoogleDrive #{google} login for #{google_user} #{google.present? ? 'succeeded' : 'failed'}."
|
55
|
+
print_error "GoogleDrive opening spreadsheet: #{spreadsheet_title}, worksheet: #{worksheet_title}."
|
56
|
+
spreadsheet = google.spreadsheet_by_title(spreadsheet_title)
|
57
|
+
sheet = spreadsheet.worksheet_by_title(worksheet_title)
|
58
|
+
print_error "GoogleDrive was opened and sheet name was found: #{sheet.title}"
|
59
|
+
sheet.rows.each do |row|
|
60
|
+
block.call(row.collect {|value| value })
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
data/lib/data_spork/importer.rb
CHANGED
@@ -1,9 +1,13 @@
|
|
1
|
-
require '
|
1
|
+
require 'data_spork/base_reader'
|
2
|
+
require 'data_spork/xml_writer'
|
3
|
+
require 'data_spork/google_spreadsheet'
|
2
4
|
|
3
5
|
module DataSpork
|
6
|
+
|
4
7
|
class Importer
|
5
|
-
attr_reader :
|
6
|
-
attr_reader :
|
8
|
+
attr_reader :headers, :input_type, :options, :writer
|
9
|
+
attr_reader :row_num, :row, :col_map
|
10
|
+
attr_reader :root_tag, :row_tag, :xml_tags
|
7
11
|
attr_accessor :col_num, :setup_state, :blank_row
|
8
12
|
attr_accessor :effective_date
|
9
13
|
|
@@ -12,21 +16,14 @@ module DataSpork
|
|
12
16
|
# @param :input_type symbol indicating whether to output :xlsx or :csv
|
13
17
|
# @param :options hash with options to control the behavior of the conversion
|
14
18
|
def self.convert(input_type, options = nil)
|
15
|
-
new(input_type, options).convert
|
19
|
+
self.new(input_type, options).convert
|
16
20
|
end
|
17
21
|
|
18
22
|
# Constructor
|
19
23
|
def initialize(input_type, options = nil)
|
20
24
|
@input_type = input_type
|
21
25
|
init_options options
|
22
|
-
@
|
23
|
-
@root_tag = 'MenuBoard'
|
24
|
-
@row_tag = 'menu_choice'
|
25
|
-
@location_filters = {
|
26
|
-
default: nil
|
27
|
-
}
|
28
|
-
@started_xml = false
|
29
|
-
@menu_board = {}
|
26
|
+
@writer = XmlWriter.new(self)
|
30
27
|
end
|
31
28
|
|
32
29
|
def init_options(options)
|
@@ -38,8 +35,12 @@ module DataSpork
|
|
38
35
|
/^[Ee]ffective [Dd]ate+/
|
39
36
|
end
|
40
37
|
|
41
|
-
def
|
42
|
-
|
38
|
+
def print(str)
|
39
|
+
puts str
|
40
|
+
end
|
41
|
+
|
42
|
+
def print_error(str)
|
43
|
+
$stderr.puts str
|
43
44
|
end
|
44
45
|
|
45
46
|
def source_name
|
@@ -65,36 +66,16 @@ module DataSpork
|
|
65
66
|
''
|
66
67
|
end
|
67
68
|
|
68
|
-
def
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
@started_xml
|
74
|
-
end
|
75
|
-
|
76
|
-
def each_csv(&block)
|
77
|
-
CSV.foreach(input_pathname) do |row|
|
78
|
-
block.call row
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def each_xlsx(&block)
|
83
|
-
excel = SimpleXlsxReader.open(input_pathname)
|
84
|
-
$stderr.puts "Excel file was opened and sheet name was found: #{excel.sheets.first.name}"
|
85
|
-
excel.sheets.first.rows.each do |row|
|
86
|
-
block.call row
|
69
|
+
def reader
|
70
|
+
if csv?
|
71
|
+
CSV_Reader.new(self)
|
72
|
+
elsif xlsx?
|
73
|
+
XLSX_Reader.new(self)
|
87
74
|
end
|
88
75
|
end
|
89
76
|
|
90
77
|
def each(&block)
|
91
|
-
|
92
|
-
each_csv &block
|
93
|
-
elsif xlsx?
|
94
|
-
each_xlsx &block
|
95
|
-
elsif google?
|
96
|
-
each_google &block
|
97
|
-
end
|
78
|
+
reader.each &block
|
98
79
|
end
|
99
80
|
|
100
81
|
def csv?
|
@@ -117,9 +98,9 @@ module DataSpork
|
|
117
98
|
|
118
99
|
def start
|
119
100
|
if VERBOSE_IO_OPTIONS
|
120
|
-
|
121
|
-
|
122
|
-
|
101
|
+
print_error "options: #{options}"
|
102
|
+
print_error "input_pathname: #{input_pathname}, exists: #{File.exist?(input_pathname)}"
|
103
|
+
print_error "output_filename: #{output_filename}" if options[:output_file]
|
123
104
|
end
|
124
105
|
if options[:output_file]
|
125
106
|
output_pathname.mkpath
|
@@ -127,11 +108,11 @@ module DataSpork
|
|
127
108
|
end
|
128
109
|
@row_num = 0
|
129
110
|
@headers = []
|
130
|
-
self.setup_state = :
|
111
|
+
self.setup_state = :setup_writer
|
131
112
|
end
|
132
113
|
|
133
114
|
def finish
|
134
|
-
|
115
|
+
writer.finish
|
135
116
|
finish_capture
|
136
117
|
end
|
137
118
|
|
@@ -236,7 +217,7 @@ module DataSpork
|
|
236
217
|
|
237
218
|
# Output the current row of data, which were parsed from the CSV input.
|
238
219
|
def output
|
239
|
-
unless reject?.tap { |r|
|
220
|
+
unless reject?.tap { |r| print "rejected #{row_num}: #{row}" if r and VERBOSE }
|
240
221
|
if headers.empty?
|
241
222
|
send setup_state
|
242
223
|
else
|
@@ -261,8 +242,8 @@ module DataSpork
|
|
261
242
|
end
|
262
243
|
|
263
244
|
# Initializes the xml document and transfers setup_state to :setup
|
264
|
-
def
|
265
|
-
|
245
|
+
def setup_writer
|
246
|
+
writer.start
|
266
247
|
self.setup_state = :setup
|
267
248
|
send setup_state # automatically transition to next state
|
268
249
|
end
|
@@ -270,102 +251,39 @@ module DataSpork
|
|
270
251
|
# Initializes the headers on the first row and optionally outputs them when VERBOSE=true.
|
271
252
|
def setup
|
272
253
|
row.each do |col|
|
273
|
-
headers <<
|
254
|
+
headers << col_map[col]
|
274
255
|
end
|
275
|
-
|
276
|
-
end
|
277
|
-
|
278
|
-
# Output the XML preface.
|
279
|
-
def start_xml
|
280
|
-
start_xml_document
|
281
|
-
start_xml_schema
|
282
|
-
started_xml
|
256
|
+
print "headers: #{row_num}: #{headers}" if VERBOSE
|
283
257
|
end
|
284
258
|
|
285
|
-
#
|
286
|
-
def
|
287
|
-
|
288
|
-
end
|
289
|
-
|
290
|
-
# Output the schema with the root tag.
|
291
|
-
def start_xml_schema
|
292
|
-
puts begin_tag %Q(#{root_tag} #{schema_uri} effective_date="#{effective_date}")
|
259
|
+
# Answer the value for the current column of data, or for the specified index.
|
260
|
+
def col_value(index = nil)
|
261
|
+
row[index || col_num]
|
293
262
|
end
|
294
263
|
|
295
|
-
|
296
|
-
|
264
|
+
# Answer the header for the current column of data, or for the specified index.
|
265
|
+
def header(index = nil)
|
266
|
+
headers[index || col_num]
|
297
267
|
end
|
298
268
|
|
299
|
-
#
|
300
|
-
def
|
301
|
-
|
269
|
+
# Answer true when the current column should be included in the output.
|
270
|
+
def output_column?
|
271
|
+
xml_tags.include? header
|
302
272
|
end
|
303
273
|
|
304
274
|
# Output the current row, one column at a time.
|
305
275
|
def put_row
|
306
|
-
|
276
|
+
writer.begin_put_row
|
307
277
|
capture_row
|
308
278
|
row.each_index do |index|
|
309
279
|
self.col_num = index
|
310
280
|
if output_column?
|
311
|
-
put_column
|
281
|
+
writer.put_column
|
312
282
|
capture_column
|
313
283
|
end
|
314
284
|
end
|
315
|
-
|
285
|
+
writer.end_put_row
|
316
286
|
end
|
317
287
|
|
318
|
-
# Answer true when the current column should be included in the output.
|
319
|
-
def output_column?
|
320
|
-
xml_tags.include? header
|
321
|
-
end
|
322
|
-
|
323
|
-
# Answer the value for the current column of data, or for the specified index.
|
324
|
-
def col_value(index = nil)
|
325
|
-
row[index || col_num]
|
326
|
-
end
|
327
|
-
|
328
|
-
# Answer the header for the current column of data, or for the specified index.
|
329
|
-
def header(index = nil)
|
330
|
-
headers[index || col_num]
|
331
|
-
end
|
332
|
-
|
333
|
-
# Output the value enclosing it with the specified XML tag.
|
334
|
-
# @param :tag the xml tag name
|
335
|
-
# @param :value the value to output
|
336
|
-
def put_tag(tag, value)
|
337
|
-
puts "#{indent 2}#{begin_tag tag}#{value || ''}#{end_tag tag}"
|
338
|
-
end
|
339
|
-
|
340
|
-
# Output the current column enclosing it with XML tags.
|
341
|
-
def put_column
|
342
|
-
puts "#{indent 2}#{begin_tag}#{col_value}#{end_tag}"
|
343
|
-
end
|
344
|
-
|
345
|
-
# Answer a begin tag using the current header, or else override it with the sender's tag.
|
346
|
-
def begin_tag(h = nil)
|
347
|
-
"<#{h || header}>"
|
348
|
-
end
|
349
|
-
|
350
|
-
# Answer the end tag using the current header, or else override it with the sender's tag.
|
351
|
-
def end_tag(h = nil)
|
352
|
-
"</#{h || header}>"
|
353
|
-
end
|
354
|
-
|
355
|
-
# Answer the tag defining a row.
|
356
|
-
def begin_row
|
357
|
-
begin_tag(row_tag)
|
358
|
-
end
|
359
|
-
|
360
|
-
# Answer the closing tag for the current row.
|
361
|
-
def end_row
|
362
|
-
end_tag(row_tag)
|
363
|
-
end
|
364
|
-
|
365
|
-
# Answer a string prefix for the specified indent level.
|
366
|
-
# @param :level specifies the indent level; default is 1.
|
367
|
-
def indent(level = 1)
|
368
|
-
"\t" * level
|
369
|
-
end
|
370
288
|
end
|
371
289
|
end
|
data/lib/data_spork/version.rb
CHANGED
@@ -0,0 +1,93 @@
|
|
1
|
+
class DataSpork::XmlWriter
|
2
|
+
|
3
|
+
attr_accessor :started
|
4
|
+
attr_reader :owner
|
5
|
+
|
6
|
+
delegate :print, :print_error, :to => :owner
|
7
|
+
delegate :effective_date, :xml_tags, :root_tag, :row_tag, :header, :col_value, :to => :owner
|
8
|
+
|
9
|
+
def initialize(owner)
|
10
|
+
@owner = owner
|
11
|
+
@started = false
|
12
|
+
end
|
13
|
+
|
14
|
+
# Output the XML preface.
|
15
|
+
def start
|
16
|
+
start_document
|
17
|
+
start_schema
|
18
|
+
self.started = true
|
19
|
+
end
|
20
|
+
|
21
|
+
# Output the XML document preface.
|
22
|
+
def start_document
|
23
|
+
print '<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>'
|
24
|
+
end
|
25
|
+
|
26
|
+
# Output the schema with the root tag.
|
27
|
+
def start_schema
|
28
|
+
if owner.respond_to?(:xml_start_schema)
|
29
|
+
print owner.xml_start_schema(self)
|
30
|
+
else
|
31
|
+
print begin_tag %Q(#{root_tag} #{schema_uri} effective_date="#{effective_date}")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def schema_uri
|
36
|
+
%q(xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance")
|
37
|
+
end
|
38
|
+
|
39
|
+
def started?
|
40
|
+
started
|
41
|
+
end
|
42
|
+
|
43
|
+
# Close out the XML document.
|
44
|
+
def finish
|
45
|
+
print end_tag root_tag if started?
|
46
|
+
end
|
47
|
+
|
48
|
+
def begin_put_row
|
49
|
+
print "#{indent}#{begin_row}"
|
50
|
+
end
|
51
|
+
|
52
|
+
def end_put_row
|
53
|
+
print "#{indent}#{end_row}"
|
54
|
+
end
|
55
|
+
|
56
|
+
# Output the value enclosing it with the specified XML tag.
|
57
|
+
# @param :tag the xml tag name
|
58
|
+
# @param :value the value to output
|
59
|
+
def put_tag(tag, value)
|
60
|
+
print "#{indent 2}#{begin_tag tag}#{value || ''}#{end_tag tag}"
|
61
|
+
end
|
62
|
+
|
63
|
+
# Output the current column enclosing it with XML tags.
|
64
|
+
def put_column
|
65
|
+
print "#{indent 2}#{begin_tag}#{col_value}#{end_tag}"
|
66
|
+
end
|
67
|
+
|
68
|
+
# Answer a begin tag using the current header, or else override it with the sender's tag.
|
69
|
+
def begin_tag(h = nil)
|
70
|
+
"<#{h || header}>"
|
71
|
+
end
|
72
|
+
|
73
|
+
# Answer the end tag using the current header, or else override it with the sender's tag.
|
74
|
+
def end_tag(h = nil)
|
75
|
+
"</#{h || header}>"
|
76
|
+
end
|
77
|
+
|
78
|
+
# Answer the tag defining a row.
|
79
|
+
def begin_row
|
80
|
+
begin_tag(row_tag)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Answer the closing tag for the current row.
|
84
|
+
def end_row
|
85
|
+
end_tag(row_tag)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Answer a string prefix for the specified indent level.
|
89
|
+
# @param :level specifies the indent level; default is 1.
|
90
|
+
def indent(level = 1)
|
91
|
+
"\t" * level
|
92
|
+
end
|
93
|
+
end
|
data/lib/data_spork.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_spork
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brian Jackson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -88,8 +88,11 @@ extensions: []
|
|
88
88
|
extra_rdoc_files: []
|
89
89
|
files:
|
90
90
|
- lib/data_spork.rb
|
91
|
+
- lib/data_spork/base_reader.rb
|
92
|
+
- lib/data_spork/google_spreadsheet.rb
|
91
93
|
- lib/data_spork/importer.rb
|
92
94
|
- lib/data_spork/version.rb
|
95
|
+
- lib/data_spork/xml_writer.rb
|
93
96
|
homepage: http://bitbucket.org/leadbaxter/data_spork
|
94
97
|
licenses:
|
95
98
|
- MIT
|