remote_table 0.2.32 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/CHANGELOG +5 -0
  2. data/Gemfile +4 -0
  3. data/Gemfile.lock +65 -0
  4. data/LICENSE +1 -1
  5. data/README.rdoc +21 -7
  6. data/Rakefile +12 -61
  7. data/lib/remote_table/cleaner.rb +19 -0
  8. data/lib/remote_table/executor.rb +29 -0
  9. data/lib/remote_table/format/delimited.rb +62 -0
  10. data/lib/remote_table/format/excel.rb +10 -0
  11. data/lib/remote_table/format/excelx.rb +10 -0
  12. data/lib/remote_table/format/fixed_width.rb +47 -0
  13. data/lib/remote_table/format/html.rb +43 -0
  14. data/lib/remote_table/format/mixins/rooable.rb +47 -0
  15. data/lib/remote_table/format/mixins/textual.rb +34 -0
  16. data/lib/remote_table/format/open_office.rb +10 -0
  17. data/lib/remote_table/format.rb +35 -0
  18. data/lib/remote_table/hasher.rb +25 -0
  19. data/lib/remote_table/local_file.rb +92 -0
  20. data/lib/remote_table/properties.rb +209 -0
  21. data/lib/remote_table/transformer.rb +17 -0
  22. data/lib/remote_table/version.rb +3 -0
  23. data/lib/remote_table.rb +91 -99
  24. data/remote_table.gemspec +32 -77
  25. data/test/{test_helper.rb → helper.rb} +9 -2
  26. data/test/test_big.rb +61 -0
  27. data/test/test_errata.rb +46 -0
  28. data/test/test_old_syntax.rb +229 -0
  29. data/test/test_old_transform.rb +49 -0
  30. data/test/test_remote_table.rb +13 -0
  31. metadata +176 -53
  32. data/VERSION +0 -1
  33. data/lib/remote_table/file/csv.rb +0 -49
  34. data/lib/remote_table/file/fixed_width.rb +0 -19
  35. data/lib/remote_table/file/html.rb +0 -37
  36. data/lib/remote_table/file/ods.rb +0 -11
  37. data/lib/remote_table/file/roo_spreadsheet.rb +0 -44
  38. data/lib/remote_table/file/xls.rb +0 -11
  39. data/lib/remote_table/file/xlsx.rb +0 -11
  40. data/lib/remote_table/file.rb +0 -100
  41. data/lib/remote_table/package.rb +0 -89
  42. data/lib/remote_table/request.rb +0 -44
  43. data/lib/remote_table/transform.rb +0 -58
  44. data/test/remote_table_test.rb +0 -386
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: remote_table
3
3
  version: !ruby/object:Gem::Version
4
- hash: 87
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
+ - 1
7
8
  - 0
8
- - 2
9
- - 32
10
- version: 0.2.32
9
+ - 0
10
+ version: 1.0.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Seamus Abshere
@@ -16,39 +16,38 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-10-07 00:00:00 -05:00
19
+ date: 2011-01-24 00:00:00 -06:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency
23
- name: roo
23
+ name: activesupport
24
24
  prerelease: false
25
25
  requirement: &id001 !ruby/object:Gem::Requirement
26
26
  none: false
27
27
  requirements:
28
- - - "="
28
+ - - ">="
29
29
  - !ruby/object:Gem::Version
30
- hash: 13
30
+ hash: 11
31
31
  segments:
32
- - 1
32
+ - 2
33
33
  - 3
34
- - 11
35
- version: 1.3.11
34
+ - 4
35
+ version: 2.3.4
36
36
  type: :runtime
37
37
  version_requirements: *id001
38
38
  - !ruby/object:Gem::Dependency
39
- name: activesupport
39
+ name: roo
40
40
  prerelease: false
41
41
  requirement: &id002 !ruby/object:Gem::Requirement
42
42
  none: false
43
43
  requirements:
44
- - - ">="
44
+ - - ~>
45
45
  - !ruby/object:Gem::Version
46
- hash: 11
46
+ hash: 29
47
47
  segments:
48
- - 2
49
- - 3
50
- - 4
51
- version: 2.3.4
48
+ - 1
49
+ - 9
50
+ version: "1.9"
52
51
  type: :runtime
53
52
  version_requirements: *id002
54
53
  - !ruby/object:Gem::Dependency
@@ -68,9 +67,51 @@ dependencies:
68
67
  type: :runtime
69
68
  version_requirements: *id003
70
69
  - !ruby/object:Gem::Dependency
71
- name: nokogiri
70
+ name: i18n
72
71
  prerelease: false
73
72
  requirement: &id004 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ hash: 3
78
+ segments:
79
+ - 0
80
+ version: "0"
81
+ type: :runtime
82
+ version_requirements: *id004
83
+ - !ruby/object:Gem::Dependency
84
+ name: builder
85
+ prerelease: false
86
+ requirement: &id005 !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ hash: 3
92
+ segments:
93
+ - 0
94
+ version: "0"
95
+ type: :runtime
96
+ version_requirements: *id005
97
+ - !ruby/object:Gem::Dependency
98
+ name: zip
99
+ prerelease: false
100
+ requirement: &id006 !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ hash: 3
106
+ segments:
107
+ - 0
108
+ version: "0"
109
+ type: :runtime
110
+ version_requirements: *id006
111
+ - !ruby/object:Gem::Dependency
112
+ name: nokogiri
113
+ prerelease: false
114
+ requirement: &id007 !ruby/object:Gem::Requirement
74
115
  none: false
75
116
  requirements:
76
117
  - - ">="
@@ -82,11 +123,39 @@ dependencies:
82
123
  - 1
83
124
  version: 1.4.1
84
125
  type: :runtime
85
- version_requirements: *id004
126
+ version_requirements: *id007
127
+ - !ruby/object:Gem::Dependency
128
+ name: spreadsheet
129
+ prerelease: false
130
+ requirement: &id008 !ruby/object:Gem::Requirement
131
+ none: false
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ hash: 3
136
+ segments:
137
+ - 0
138
+ version: "0"
139
+ type: :runtime
140
+ version_requirements: *id008
141
+ - !ruby/object:Gem::Dependency
142
+ name: google-spreadsheet-ruby
143
+ prerelease: false
144
+ requirement: &id009 !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ hash: 3
150
+ segments:
151
+ - 0
152
+ version: "0"
153
+ type: :runtime
154
+ version_requirements: *id009
86
155
  - !ruby/object:Gem::Dependency
87
156
  name: escape
88
157
  prerelease: false
89
- requirement: &id005 !ruby/object:Gem::Requirement
158
+ requirement: &id010 !ruby/object:Gem::Requirement
90
159
  none: false
91
160
  requirements:
92
161
  - - ">="
@@ -98,11 +167,11 @@ dependencies:
98
167
  - 4
99
168
  version: 0.0.4
100
169
  type: :runtime
101
- version_requirements: *id005
170
+ version_requirements: *id010
102
171
  - !ruby/object:Gem::Dependency
103
172
  name: errata
104
173
  prerelease: false
105
- requirement: &id006 !ruby/object:Gem::Requirement
174
+ requirement: &id011 !ruby/object:Gem::Requirement
106
175
  none: false
107
176
  requirements:
108
177
  - - ">="
@@ -114,48 +183,98 @@ dependencies:
114
183
  - 0
115
184
  version: 0.2.0
116
185
  type: :development
117
- version_requirements: *id006
118
- description: Remotely open and parse Excel XLS, ODS, CSV and fixed-width tables.
119
- email: seamus@abshere.net
186
+ version_requirements: *id011
187
+ - !ruby/object:Gem::Dependency
188
+ name: test-unit
189
+ prerelease: false
190
+ requirement: &id012 !ruby/object:Gem::Requirement
191
+ none: false
192
+ requirements:
193
+ - - ">="
194
+ - !ruby/object:Gem::Version
195
+ hash: 3
196
+ segments:
197
+ - 0
198
+ version: "0"
199
+ type: :development
200
+ version_requirements: *id012
201
+ - !ruby/object:Gem::Dependency
202
+ name: shoulda
203
+ prerelease: false
204
+ requirement: &id013 !ruby/object:Gem::Requirement
205
+ none: false
206
+ requirements:
207
+ - - ">="
208
+ - !ruby/object:Gem::Version
209
+ hash: 3
210
+ segments:
211
+ - 0
212
+ version: "0"
213
+ type: :development
214
+ version_requirements: *id013
215
+ - !ruby/object:Gem::Dependency
216
+ name: ruby-debug
217
+ prerelease: false
218
+ requirement: &id014 !ruby/object:Gem::Requirement
219
+ none: false
220
+ requirements:
221
+ - - ">="
222
+ - !ruby/object:Gem::Version
223
+ hash: 3
224
+ segments:
225
+ - 0
226
+ version: "0"
227
+ type: :development
228
+ version_requirements: *id014
229
+ description: Gives you a standard way to parse various formats and treat them as an array of hashes.
230
+ email:
231
+ - seamus@abshere.net
120
232
  executables: []
121
233
 
122
234
  extensions: []
123
235
 
124
- extra_rdoc_files:
125
- - LICENSE
126
- - README.rdoc
236
+ extra_rdoc_files: []
237
+
127
238
  files:
128
239
  - .document
129
240
  - .gitignore
130
241
  - CHANGELOG
242
+ - Gemfile
243
+ - Gemfile.lock
131
244
  - LICENSE
132
245
  - README.rdoc
133
246
  - Rakefile
134
- - VERSION
135
247
  - lib/remote_table.rb
136
- - lib/remote_table/file.rb
137
- - lib/remote_table/file/csv.rb
138
- - lib/remote_table/file/fixed_width.rb
139
- - lib/remote_table/file/html.rb
140
- - lib/remote_table/file/ods.rb
141
- - lib/remote_table/file/roo_spreadsheet.rb
142
- - lib/remote_table/file/xls.rb
143
- - lib/remote_table/file/xlsx.rb
144
- - lib/remote_table/package.rb
145
- - lib/remote_table/request.rb
146
- - lib/remote_table/transform.rb
248
+ - lib/remote_table/cleaner.rb
249
+ - lib/remote_table/executor.rb
250
+ - lib/remote_table/format.rb
251
+ - lib/remote_table/format/delimited.rb
252
+ - lib/remote_table/format/excel.rb
253
+ - lib/remote_table/format/excelx.rb
254
+ - lib/remote_table/format/fixed_width.rb
255
+ - lib/remote_table/format/html.rb
256
+ - lib/remote_table/format/mixins/rooable.rb
257
+ - lib/remote_table/format/mixins/textual.rb
258
+ - lib/remote_table/format/open_office.rb
259
+ - lib/remote_table/hasher.rb
260
+ - lib/remote_table/local_file.rb
261
+ - lib/remote_table/properties.rb
262
+ - lib/remote_table/transformer.rb
263
+ - lib/remote_table/version.rb
147
264
  - remote_table.gemspec
148
- - test/remote_table_test.rb
149
- - test/test_helper.rb
265
+ - test/helper.rb
266
+ - test/test_big.rb
267
+ - test/test_errata.rb
268
+ - test/test_old_syntax.rb
269
+ - test/test_old_transform.rb
270
+ - test/test_remote_table.rb
150
271
  has_rdoc: true
151
- homepage: http://github.com/seamusabshere/remote_table
272
+ homepage: https://github.com/seamusabshere/remote_table
152
273
  licenses: []
153
274
 
154
275
  post_install_message:
155
- rdoc_options:
156
- - --charset=UTF-8
157
- - --line-numbers
158
- - --inline-source
276
+ rdoc_options: []
277
+
159
278
  require_paths:
160
279
  - lib
161
280
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -176,13 +295,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
176
295
  segments:
177
296
  - 0
178
297
  version: "0"
179
- requirements:
180
- - curl
298
+ requirements: []
299
+
181
300
  rubyforge_project: remotetable
182
301
  rubygems_version: 1.3.7
183
302
  signing_key:
184
303
  specification_version: 3
185
- summary: Remotely open and parse XLS, ODS, CSV and fixed-width tables.
304
+ summary: Open local or remote XLSX, XLS, ODS, CSV and fixed-width files.
186
305
  test_files:
187
- - test/remote_table_test.rb
188
- - test/test_helper.rb
306
+ - test/helper.rb
307
+ - test/test_big.rb
308
+ - test/test_errata.rb
309
+ - test/test_old_syntax.rb
310
+ - test/test_old_transform.rb
311
+ - test/test_remote_table.rb
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.2.32
@@ -1,49 +0,0 @@
1
- class RemoteTable
2
- module Csv
3
- def each_row(&block)
4
- backup_file!
5
- convert_file_to_utf8!
6
- remove_useless_characters!
7
- skip_rows!
8
- FasterCSV.foreach(path, fastercsv_options) do |row|
9
- ordered_hash = ActiveSupport::OrderedHash.new
10
- filled_values = 0
11
- case row
12
- when FasterCSV::Row
13
- row.each do |header, value|
14
- next if header.blank?
15
- value = '' if value.nil?
16
- ordered_hash[header] = value
17
- filled_values += 1 if value.present?
18
- end
19
- when Array
20
- index = 0
21
- row.each do |value|
22
- value = '' if value.nil?
23
- ordered_hash[index] = value
24
- filled_values += 1 if value.present?
25
- index += 1
26
- end
27
- else
28
- raise "Unexpected #{row.inspect}"
29
- end
30
- yield ordered_hash if keep_blank_rows or filled_values.nonzero?
31
- end
32
- ensure
33
- restore_file!
34
- end
35
-
36
- private
37
-
38
- def fastercsv_options
39
- fastercsv_options = { :skip_blanks => !keep_blank_rows }
40
- if headers == false
41
- fastercsv_options.merge!(:headers => nil)
42
- else
43
- fastercsv_options.merge!(:headers => :first_row)
44
- end
45
- fastercsv_options.merge!(:col_sep => delimiter) if delimiter
46
- fastercsv_options
47
- end
48
- end
49
- end
@@ -1,19 +0,0 @@
1
- class RemoteTable
2
- module FixedWidth
3
- def each_row(&block)
4
- backup_file!
5
- convert_file_to_utf8!
6
- remove_useless_characters!
7
- crop_rows!
8
- skip_rows!
9
- cut_columns!
10
- a = Slither.parse(path, schema_name)
11
- a[:rows].each do |hash|
12
- hash.reject! { |k, v| k.blank? }
13
- yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
14
- end
15
- ensure
16
- restore_file!
17
- end
18
- end
19
- end
@@ -1,37 +0,0 @@
1
- class RemoteTable
2
- module Html
3
- def each_row(&block)
4
- backup_file!
5
- convert_file_to_utf8!
6
- remove_useless_characters!
7
- html_headers = (headers.is_a?(Array)) ? headers : nil
8
- Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(row_xpath).each do |row|
9
- values = row.xpath(column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
10
- if html_headers.nil?
11
- html_headers = values
12
- next
13
- end
14
- hash = zip html_headers, values
15
- yield hash if keep_blank_rows or hash.any? { |k, v| v.present? }
16
- end
17
- ensure
18
- restore_file!
19
- end
20
-
21
- private
22
-
23
- # http://snippets.dzone.com/posts/show/406
24
- def zip(keys, values)
25
- hash = Hash.new
26
- keys.zip(values) { |k,v| hash[k]=v }
27
- hash
28
- end
29
-
30
- # should we be doing this in ruby?
31
- def unescaped_html_without_soft_hyphens
32
- str = CGI.unescapeHTML IO.read(path)
33
- str.gsub! /­/, ''
34
- str
35
- end
36
- end
37
- end
@@ -1,11 +0,0 @@
1
- class RemoteTable
2
- module Ods
3
- def self.extended(base)
4
- base.send :extend, RooSpreadsheet
5
- end
6
-
7
- def roo_klass
8
- Openoffice
9
- end
10
- end
11
- end
@@ -1,44 +0,0 @@
1
- class RemoteTable
2
- module RooSpreadsheet
3
- def each_row(&block)
4
- oo = roo_klass.new(path, nil, :ignore)
5
- oo.default_sheet = sheet.is_a?(Numeric) ? oo.sheets[sheet] : sheet
6
- column_references = Hash.new
7
- if headers == false
8
- # zero-based numeric keys
9
- for col in (1..oo.last_column)
10
- column_references[col] = col - 1
11
- end
12
- elsif headers.is_a? Array
13
- # names
14
- for col in (1..oo.last_column)
15
- column_references[col] = headers[col - 1]
16
- end
17
- else
18
- # read headers from the file itself
19
- for col in (1..oo.last_column)
20
- column_references[col] = oo.cell(header_row, col)
21
- column_references[col] = oo.cell(header_row - 1, col) if column_references[col].blank? # look up
22
- end
23
- end
24
- first_data_row.upto(oo.last_row) do |raw_row|
25
- ordered_hash = ActiveSupport::OrderedHash.new
26
- for col in (1..oo.last_column)
27
- next if column_references[col].blank?
28
- ordered_hash[column_references[col]] = oo.cell(raw_row, col).to_s.gsub(/<[^>]+>/, '').strip
29
- end
30
- yield ordered_hash if keep_blank_rows or ordered_hash.any? { |k, v| v.present? }
31
- end
32
- end
33
-
34
- private
35
-
36
- def header_row
37
- 1 + skip.to_i
38
- end
39
-
40
- def first_data_row
41
- 1 + header_row
42
- end
43
- end
44
- end
@@ -1,11 +0,0 @@
1
- class RemoteTable
2
- module Xls
3
- def self.extended(base)
4
- base.send :extend, RooSpreadsheet
5
- end
6
-
7
- def roo_klass
8
- Excel
9
- end
10
- end
11
- end
@@ -1,11 +0,0 @@
1
- class RemoteTable
2
- module Xlsx
3
- def self.extended(base)
4
- base.send :extend, RooSpreadsheet
5
- end
6
-
7
- def roo_klass
8
- Excelx
9
- end
10
- end
11
- end
@@ -1,100 +0,0 @@
1
- class RemoteTable
2
- class File
3
- attr_accessor :filename, :format, :delimiter, :skip, :cut, :crop, :sheet, :headers, :schema, :schema_name, :trap
4
- attr_accessor :encoding
5
- attr_accessor :path
6
- attr_accessor :keep_blank_rows
7
- attr_accessor :row_xpath
8
- attr_accessor :column_xpath
9
-
10
- def initialize(bus)
11
- @filename = bus[:filename]
12
- @format = bus[:format] || format_from_filename
13
- @delimiter = bus[:delimiter]
14
- @sheet = bus[:sheet] || 0
15
- @skip = bus[:skip] # rows
16
- @keep_blank_rows = bus[:keep_blank_rows] || false
17
- @crop = bus[:crop] # rows
18
- @cut = bus[:cut] # columns
19
- @headers = bus[:headers]
20
- @schema = bus[:schema]
21
- @schema_name = bus[:schema_name]
22
- @trap = bus[:trap]
23
- @encoding = bus[:encoding] || 'UTF-8'
24
- @row_xpath = bus[:row_xpath]
25
- @column_xpath = bus[:column_xpath]
26
- extend "RemoteTable::#{format.to_s.camelcase}".constantize
27
- end
28
-
29
- def tabulate(path)
30
- define_fixed_width_schema! if format == :fixed_width and schema.is_a?(Array) # TODO move to generic subclass callback
31
- self.path = path
32
- self
33
- end
34
-
35
- private
36
-
37
- # doesn't support trap
38
- def define_fixed_width_schema!
39
- raise "can't define both schema_name and schema" if !schema_name.blank?
40
- self.schema_name = "autogenerated_#{filename.gsub(/[^a-z0-9_]/i, '')}".to_sym
41
- self.trap ||= lambda { |_| true }
42
- Slither.define schema_name do |d|
43
- d.rows do |row|
44
- row.trap(&trap)
45
- schema.each do |name, width, options|
46
- if name == 'spacer'
47
- row.spacer width
48
- else
49
- row.column name, width, options
50
- end
51
- end
52
- end
53
- end
54
- end
55
-
56
- def backup_file!
57
- FileUtils.cp path, "#{path}.backup"
58
- end
59
-
60
- def skip_rows!
61
- return unless skip
62
- RemoteTable.bang path, "tail -n +#{skip + 1}"
63
- end
64
-
65
- USELESS_CHARACTERS = [
66
- '\xef\xbb\xbf', # UTF-8 byte order mark
67
- '\xc2\xad' # soft hyphen, often inserted by MS Office (html: &shy;)
68
- ]
69
- def remove_useless_characters!
70
- RemoteTable.bang path, "perl -pe 's/#{USELESS_CHARACTERS.join '//g; s/'}//g'"
71
- end
72
-
73
- def convert_file_to_utf8!
74
- RemoteTable.bang path, "iconv -c -f #{Escape.shell_single_word encoding} -t UTF-8"
75
- end
76
-
77
- def restore_file!
78
- FileUtils.mv "#{path}.backup", path if ::File.readable? "#{path}.backup"
79
- end
80
-
81
- def cut_columns!
82
- return unless cut
83
- RemoteTable.bang path, "cut -c #{Escape.shell_single_word cut.to_s}"
84
- end
85
-
86
- def crop_rows!
87
- return unless crop
88
- RemoteTable.bang path, "tail -n +#{Escape.shell_single_word crop.first.to_s} | head -n #{crop.last - crop.first + 1}"
89
- end
90
-
91
- def format_from_filename
92
- extname = ::File.extname(filename).gsub('.', '')
93
- return :csv if extname.blank?
94
- format = [ :xls, :ods, :xlsx ].detect { |i| i == extname.to_sym }
95
- format = :html if extname =~ /\Ahtm/
96
- format = :csv if format.blank?
97
- format
98
- end
99
- end
100
- end