remote_table 0.2.32 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/CHANGELOG +5 -0
  2. data/Gemfile +4 -0
  3. data/Gemfile.lock +65 -0
  4. data/LICENSE +1 -1
  5. data/README.rdoc +21 -7
  6. data/Rakefile +12 -61
  7. data/lib/remote_table/cleaner.rb +19 -0
  8. data/lib/remote_table/executor.rb +29 -0
  9. data/lib/remote_table/format/delimited.rb +62 -0
  10. data/lib/remote_table/format/excel.rb +10 -0
  11. data/lib/remote_table/format/excelx.rb +10 -0
  12. data/lib/remote_table/format/fixed_width.rb +47 -0
  13. data/lib/remote_table/format/html.rb +43 -0
  14. data/lib/remote_table/format/mixins/rooable.rb +47 -0
  15. data/lib/remote_table/format/mixins/textual.rb +34 -0
  16. data/lib/remote_table/format/open_office.rb +10 -0
  17. data/lib/remote_table/format.rb +35 -0
  18. data/lib/remote_table/hasher.rb +25 -0
  19. data/lib/remote_table/local_file.rb +92 -0
  20. data/lib/remote_table/properties.rb +209 -0
  21. data/lib/remote_table/transformer.rb +17 -0
  22. data/lib/remote_table/version.rb +3 -0
  23. data/lib/remote_table.rb +91 -99
  24. data/remote_table.gemspec +32 -77
  25. data/test/{test_helper.rb → helper.rb} +9 -2
  26. data/test/test_big.rb +61 -0
  27. data/test/test_errata.rb +46 -0
  28. data/test/test_old_syntax.rb +229 -0
  29. data/test/test_old_transform.rb +49 -0
  30. data/test/test_remote_table.rb +13 -0
  31. metadata +176 -53
  32. data/VERSION +0 -1
  33. data/lib/remote_table/file/csv.rb +0 -49
  34. data/lib/remote_table/file/fixed_width.rb +0 -19
  35. data/lib/remote_table/file/html.rb +0 -37
  36. data/lib/remote_table/file/ods.rb +0 -11
  37. data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
  38. data/lib/remote_table/file/xls.rb +0 -11
  39. data/lib/remote_table/file/xlsx.rb +0 -11
  40. data/lib/remote_table/file.rb +0 -100
  41. data/lib/remote_table/package.rb +0 -89
  42. data/lib/remote_table/request.rb +0 -44
  43. data/lib/remote_table/transform.rb +0 -58
  44. data/test/remote_table_test.rb +0 -386
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- hash: 87
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
+ - 1
7
8
  - 0
8
- - 2
9
- - 32
10
- version: 0.2.32
9
+ - 0
10
+ version: 1.0.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -16,39 +16,38 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-10-07 00:00:00 -05:00
19
+ date: 2011-01-24 00:00:00 -06:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
23
- name: roo
23
+ name: activesupport
24
24
  prerelease: false
25
25
  requirement: &id001 !ruby/object:Gem::Requirement
26
26
  none: false
27
27
  requirements:
28
- - - "="
28
+ - - ">="
29
29
  - !ruby/object:Gem::Version
30
- hash: 13
30
+ hash: 11
31
31
  segments:
32
- - 1
32
+ - 2
33
33
  - 3
34
- - 11
35
- version: 1.3.11
34
+ - 4
35
+ version: 2.3.4
36
36
  type: :runtime
37
37
  version_requirements: *id001
38
38
  - !ruby/object:Gem::Dependency
39
- name: activesupport
39
+ name: roo
40
40
  prerelease: false
41
41
  requirement: &id002 !ruby/object:Gem::Requirement
42
42
  none: false
43
43
  requirements:
44
- - - ">="
44
+ - - ~>
45
45
  - !ruby/object:Gem::Version
46
- hash: 11
46
+ hash: 29
47
47
  segments:
48
- - 2
49
- - 3
50
- - 4
51
- version: 2.3.4
48
+ - 1
49
+ - 9
50
+ version: "1.9"
52
51
  type: :runtime
53
52
  version_requirements: *id002
54
53
  - !ruby/object:Gem::Dependency
@@ -68,9 +67,51 @@ dependencies:
68
67
  type: :runtime
69
68
  version_requirements: *id003
70
69
  - !ruby/object:Gem::Dependency
71
- name: nokogiri
70
+ name: i18n
72
71
  prerelease: false
73
72
  requirement: &id004 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ hash: 3
78
+ segments:
79
+ - 0
80
+ version: "0"
81
+ type: :runtime
82
+ version_requirements: *id004
83
+ - !ruby/object:Gem::Dependency
84
+ name: builder
85
+ prerelease: false
86
+ requirement: &id005 !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ hash: 3
92
+ segments:
93
+ - 0
94
+ version: "0"
95
+ type: :runtime
96
+ version_requirements: *id005
97
+ - !ruby/object:Gem::Dependency
98
+ name: zip
99
+ prerelease: false
100
+ requirement: &id006 !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ hash: 3
106
+ segments:
107
+ - 0
108
+ version: "0"
109
+ type: :runtime
110
+ version_requirements: *id006
111
+ - !ruby/object:Gem::Dependency
112
+ name: nokogiri
113
+ prerelease: false
114
+ requirement: &id007 !ruby/object:Gem::Requirement
74
115
  none: false
75
116
  requirements:
76
117
  - - ">="
@@ -82,11 +123,39 @@ dependencies:
82
123
  - 1
83
124
  version: 1.4.1
84
125
  type: :runtime
85
- version_requirements: *id004
126
+ version_requirements: *id007
127
+ - !ruby/object:Gem::Dependency
128
+ name: spreadsheet
129
+ prerelease: false
130
+ requirement: &id008 !ruby/object:Gem::Requirement
131
+ none: false
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ hash: 3
136
+ segments:
137
+ - 0
138
+ version: "0"
139
+ type: :runtime
140
+ version_requirements: *id008
141
+ - !ruby/object:Gem::Dependency
142
+ name: google-spreadsheet-ruby
143
+ prerelease: false
144
+ requirement: &id009 !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ hash: 3
150
+ segments:
151
+ - 0
152
+ version: "0"
153
+ type: :runtime
154
+ version_requirements: *id009
86
155
  - !ruby/object:Gem::Dependency
87
156
  name: escape
88
157
  prerelease: false
89
- requirement: &id005 !ruby/object:Gem::Requirement
158
+ requirement: &id010 !ruby/object:Gem::Requirement
90
159
  none: false
91
160
  requirements:
92
161
  - - ">="
@@ -98,11 +167,11 @@ dependencies:
98
167
  - 4
99
168
  version: 0.0.4
100
169
  type: :runtime
101
- version_requirements: *id005
170
+ version_requirements: *id010
102
171
  - !ruby/object:Gem::Dependency
103
172
  name: errata
104
173
  prerelease: false
105
- requirement: &id006 !ruby/object:Gem::Requirement
174
+ requirement: &id011 !ruby/object:Gem::Requirement
106
175
  none: false
107
176
  requirements:
108
177
  - - ">="
@@ -114,48 +183,98 @@ dependencies:
114
183
  - 0
115
184
  version: 0.2.0
116
185
  type: :development
117
- version_requirements: *id006
118
- description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
119
- email: seamus@abshere.net
186
+ version_requirements: *id011
187
+ - !ruby/object:Gem::Dependency
188
+ name: test-unit
189
+ prerelease: false
190
+ requirement: &id012 !ruby/object:Gem::Requirement
191
+ none: false
192
+ requirements:
193
+ - - ">="
194
+ - !ruby/object:Gem::Version
195
+ hash: 3
196
+ segments:
197
+ - 0
198
+ version: "0"
199
+ type: :development
200
+ version_requirements: *id012
201
+ - !ruby/object:Gem::Dependency
202
+ name: shoulda
203
+ prerelease: false
204
+ requirement: &id013 !ruby/object:Gem::Requirement
205
+ none: false
206
+ requirements:
207
+ - - ">="
208
+ - !ruby/object:Gem::Version
209
+ hash: 3
210
+ segments:
211
+ - 0
212
+ version: "0"
213
+ type: :development
214
+ version_requirements: *id013
215
+ - !ruby/object:Gem::Dependency
216
+ name: ruby-debug
217
+ prerelease: false
218
+ requirement: &id014 !ruby/object:Gem::Requirement
219
+ none: false
220
+ requirements:
221
+ - - ">="
222
+ - !ruby/object:Gem::Version
223
+ hash: 3
224
+ segments:
225
+ - 0
226
+ version: "0"
227
+ type: :development
228
+ version_requirements: *id014
229
+ description: Gives you a standard way to parse various formats and treat them as an array of hashes.
230
+ email:
231
+ - seamus@abshere.net
120
232
  executables: []
121
233
 
122
234
  extensions: []
123
235
 
124
- extra_rdoc_files:
125
- - LICENSE
126
- - README.rdoc
236
+ extra_rdoc_files: []
237
+
127
238
  files:
128
239
  - .document
129
240
  - .gitignore
130
241
  - CHANGELOG
242
+ - Gemfile
243
+ - Gemfile.lock
131
244
  - LICENSE
132
245
  - README.rdoc
133
246
  - Rakefile
134
- - VERSION
135
247
  - lib/remote_table.rb
136
- - lib/remote_table/file.rb
137
- - lib/remote_table/file/csv.rb
138
- - lib/remote_table/file/fixed_width.rb
139
- - lib/remote_table/file/html.rb
140
- - lib/remote_table/file/ods.rb
141
- - lib/remote_table/file/roo_spreadsheet.rb
142
- - lib/remote_table/file/xls.rb
143
- - lib/remote_table/file/xlsx.rb
144
- - lib/remote_table/package.rb
145
- - lib/remote_table/request.rb
146
- - lib/remote_table/transform.rb
248
+ - lib/remote_table/cleaner.rb
249
+ - lib/remote_table/executor.rb
250
+ - lib/remote_table/format.rb
251
+ - lib/remote_table/format/delimited.rb
252
+ - lib/remote_table/format/excel.rb
253
+ - lib/remote_table/format/excelx.rb
254
+ - lib/remote_table/format/fixed_width.rb
255
+ - lib/remote_table/format/html.rb
256
+ - lib/remote_table/format/mixins/rooable.rb
257
+ - lib/remote_table/format/mixins/textual.rb
258
+ - lib/remote_table/format/open_office.rb
259
+ - lib/remote_table/hasher.rb
260
+ - lib/remote_table/local_file.rb
261
+ - lib/remote_table/properties.rb
262
+ - lib/remote_table/transformer.rb
263
+ - lib/remote_table/version.rb
147
264
  - remote_table.gemspec
148
- - test/remote_table_test.rb
149
- - test/test_helper.rb
265
+ - test/helper.rb
266
+ - test/test_big.rb
267
+ - test/test_errata.rb
268
+ - test/test_old_syntax.rb
269
+ - test/test_old_transform.rb
270
+ - test/test_remote_table.rb
150
271
  has_rdoc: true
151
- homepage: http://github.com/seamusabshere/remote_table
272
+ homepage: https://github.com/seamusabshere/remote_table
152
273
  licenses: []
153
274
 
154
275
  post_install_message:
155
- rdoc_options:
156
- - --charset=UTF-8
157
- - --line-numbers
158
- - --inline-source
276
+ rdoc_options: []
277
+
159
278
  require_paths:
160
279
  - lib
161
280
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -176,13 +295,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
176
295
  segments:
177
296
  - 0
178
297
  version: "0"
179
- requirements:
180
- - curl
298
+ requirements: []
299
+
181
300
  rubyforge_project: remotetable
182
301
  rubygems_version: 1.3.7
183
302
  signing_key:
184
303
  specification_version: 3
185
- summary: Remotely open and parse XLS, ODS, CSV and fixed-width tables.
304
+ summary: Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
186
305
  test_files:
187
- - test/remote_table_test.rb
188
- - test/test_helper.rb
306
+ - test/helper.rb
307
+ - test/test_big.rb
308
+ - test/test_errata.rb
309
+ - test/test_old_syntax.rb
310
+ - test/test_old_transform.rb
311
+ - test/test_remote_table.rb
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.2.32
@@ -1,49 +0,0 @@
1
- class RemoteTable
2
- module Csv
3
- def each_row(&block)
4
- backup_file!
5
- convert_file_to_utf8!
6
- remove_useless_characters!
7
- skip_rows!
8
- FasterCSV.foreach(path, fastercsv_options) do |row|
9
- ordered_hash = ActiveSupport::OrderedHash.new
10
- filled_values = 0
11
- case row
12
- when FasterCSV::Row
13
- row.each do |header, value|
14
- next if header.blank?
15
- value = '' if value.nil?
16
- ordered_hash[header] = value
17
- filled_values += 1 if value.present?
18
- end
19
- when Array
20
- index = 0
21
- row.each do |value|
22
- value = '' if value.nil?
23
- ordered_hash[index] = value
24
- filled_values += 1 if value.present?
25
- index += 1
26
- end
27
- else
28
- raise "Unexpected #{row.inspect}"
29
- end
30
- yield ordered_hash if keep_blank_rows or filled_values.nonzero?
31
- end
32
- ensure
33
- restore_file!
34
- end
35
-
36
- private
37
-
38
- def fastercsv_options
39
- fastercsv_options = { :skip_blanks => !keep_blank_rows }
40
- if headers == false
41
- fastercsv_options.merge!(:headers => nil)
42
- else
43
- fastercsv_options.merge!(:headers => :first_row)
44
- end
45
- fastercsv_options.merge!(:col_sep => delimiter) if delimiter
46
- fastercsv_options
47
- end
48
- end
49
- end
@@ -1,19 +0,0 @@
1
- class RemoteTable
2
- module FixedWidth
3
- def each_row(&block)
4
- backup_file!
5
- convert_file_to_utf8!
6
- remove_useless_characters!
7
- crop_rows!
8
- skip_rows!
9
- cut_columns!
10
- a = Slither.parse(path, schema_name)
11
- a[:rows].each do |hash|
12
- hash.reject! { |k, v| k.blank? }
13
- yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
14
- end
15
- ensure
16
- restore_file!
17
- end
18
- end
19
- end
@@ -1,37 +0,0 @@
1
- class RemoteTable
2
- module Html
3
- def each_row(&block)
4
- backup_file!
5
- convert_file_to_utf8!
6
- remove_useless_characters!
7
- html_headers = (headers.is_a?(Array)) ? headers : nil
8
- Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
9
- values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
10
- if html_headers.nil?
11
- html_headers = values
12
- next
13
- end
14
- hash = zip html_headers, values
15
- yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
16
- end
17
- ensure
18
- restore_file!
19
- end
20
-
21
- private
22
-
23
- # http://snippets.dzone.com/posts/show/406
24
- def zip(keys, values)
25
- hash = Hash.new
26
- keys.zip(values) { |k,v| hash[k]=v }
27
- hash
28
- end
29
-
30
- # should we be doing this in ruby?
31
- def unescaped_html_without_soft_hyphens
32
- str = CGI.unescapeHTML IO.read(path)
33
- str.gsub! /­/, ''
34
- str
35
- end
36
- end
37
- end
@@ -1,11 +0,0 @@
1
- class RemoteTable
2
- module Ods
3
- def self.extended(base)
4
- base.send :extend, RooSpreadsheet
5
- end
6
-
7
- def roo_klass
8
- Openoffice
9
- end
10
- end
11
- end
@@ -1,44 +0,0 @@
1
- class RemoteTable
2
- module RooSpreadsheet
3
- def each_row(&block)
4
- oo = roo_klass.new(path, nil, :ignore)
5
- oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
6
- column_references = Hash.new
7
- if headers == false
8
- # zero-based numeric keys
9
- for col in (1..oo.last_column)
10
- column_references[col] = col - 1
11
- end
12
- elsif headers.is_a? Array
13
- # names
14
- for col in (1..oo.last_column)
15
- column_references[col] = headers[col - 1]
16
- end
17
- else
18
- # read headers from the file itself
19
- for col in (1..oo.last_column)
20
- column_references[col] = oo.cell(header_row, col)
21
- column_references[col] = oo.cell(header_row - 1, col) if column_references[col].blank? # look up
22
- end
23
- end
24
- first_data_row.upto(oo.last_row) do |raw_row|
25
- ordered_hash = ActiveSupport::OrderedHash.new
26
- for col in (1..oo.last_column)
27
- next if column_references[col].blank?
28
- ordered_hash[column_references[col]] = oo.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
29
- end
30
- yield ordered_hash if keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
31
- end
32
- end
33
-
34
- private
35
-
36
- def header_row
37
- 1 + skip.to_i
38
- end
39
-
40
- def first_data_row
41
- 1 + header_row
42
- end
43
- end
44
- end
@@ -1,11 +0,0 @@
1
- class RemoteTable
2
- module Xls
3
- def self.extended(base)
4
- base.send :extend, RooSpreadsheet
5
- end
6
-
7
- def roo_klass
8
- Excel
9
- end
10
- end
11
- end
@@ -1,11 +0,0 @@
1
- class RemoteTable
2
- module Xlsx
3
- def self.extended(base)
4
- base.send :extend, RooSpreadsheet
5
- end
6
-
7
- def roo_klass
8
- Excelx
9
- end
10
- end
11
- end
@@ -1,100 +0,0 @@
1
- class RemoteTable
2
- class File
3
- attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
4
- attr_accessor :encoding
5
- attr_accessor :path
6
- attr_accessor :keep_blank_rows
7
- attr_accessor :row_xpath
8
- attr_accessor :column_xpath
9
-
10
- def initialize(bus)
11
- @filename = bus[:filename]
12
- @format = bus[:format] || format_from_filename
13
- @delimiter = bus[:delimiter]
14
- @sheet = bus[:sheet] || 0
15
- @skip = bus[:skip] # rows
16
- @keep_blank_rows = bus[:keep_blank_rows] || false
17
- @crop = bus[:crop] # rows
18
- @cut = bus[:cut] # columns
19
- @headers = bus[:headers]
20
- @schema = bus[:schema]
21
- @schema_name = bus[:schema_name]
22
- @trap = bus[:trap]
23
- @encoding = bus[:encoding] || 'UTF-8'
24
- @row_xpath = bus[:row_xpath]
25
- @column_xpath = bus[:column_xpath]
26
- extend "RemoteTable::#{format.to_s.camelcase}".constantize
27
- end
28
-
29
- def tabulate(path)
30
- define_fixed_width_schema! if format == :fixed_width and schema.is_a?(Array) # TODO move to generic subclass callback
31
- self.path = path
32
- self
33
- end
34
-
35
- private
36
-
37
- # doesn't support trap
38
- def define_fixed_width_schema!
39
- raise "can't define both schema_name and schema" if !schema_name.blank?
40
- self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
41
- self.trap ||= lambda { |_| true }
42
- Slither.define schema_name do |d|
43
- d.rows do |row|
44
- row.trap(&trap)
45
- schema.each do |name, width, options|
46
- if name == 'spacer'
47
- row.spacer width
48
- else
49
- row.column name, width, options
50
- end
51
- end
52
- end
53
- end
54
- end
55
-
56
- def backup_file!
57
- FileUtils.cp path, "#{path}.backup"
58
- end
59
-
60
- def skip_rows!
61
- return unless skip
62
- RemoteTable.bang path, "tail -n +#{skip + 1}"
63
- end
64
-
65
- USELESS_CHARACTERS = [
66
- '\xef\xbb\xbf', # UTF-8 byte order mark
67
- '\xc2\xad' # soft hyphen, often inserted by MS Office (html: &shy;)
68
- ]
69
- def remove_useless_characters!
70
- RemoteTable.bang path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
71
- end
72
-
73
- def convert_file_to_utf8!
74
- RemoteTable.bang path, "iconv -c -f #{Escape.shell_single_word encoding} -t UTF-8"
75
- end
76
-
77
- def restore_file!
78
- FileUtils.mv "#{path}.backup", path if ::File.readable? "#{path}.backup"
79
- end
80
-
81
- def cut_columns!
82
- return unless cut
83
- RemoteTable.bang path, "cut -c #{Escape.shell_single_word cut.to_s}"
84
- end
85
-
86
- def crop_rows!
87
- return unless crop
88
- RemoteTable.bang path, "tail -n +#{Escape.shell_single_word crop.first.to_s} | head -n #{crop.last - crop.first + 1}"
89
- end
90
-
91
- def format_from_filename
92
- extname = ::File.extname(filename).gsub('.', '')
93
- return :csv if extname.blank?
94
- format = [ :xls, :ods, :xlsx ].detect { |i| i == extname.to_sym }
95
- format = :html if extname =~ /\Ahtm/
96
- format = :csv if format.blank?
97
- format
98
- end
99
- end
100
- end