remote_table 1.1.7 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/remote_table/format/html.rb +2 -2
- data/lib/remote_table/version.rb +1 -1
- data/test/test_remote_table.rb +6 -0
- metadata +9 -95
@@ -8,7 +8,7 @@ class RemoteTable
|
|
8
8
|
remove_useless_characters!
|
9
9
|
html_headers = (t.properties.headers.is_a?(::Array)) ? t.properties.headers : nil
|
10
10
|
::Nokogiri::HTML(unescaped_html_without_soft_hyphens, nil, 'UTF-8').xpath(t.properties.row_xpath).each do |row|
|
11
|
-
values = row.xpath(t.properties.column_xpath).map { |td|
|
11
|
+
values = row.xpath(t.properties.column_xpath).map { |td| td.content.gsub(/\s+/, ' ').strip }
|
12
12
|
if html_headers.nil?
|
13
13
|
html_headers = values
|
14
14
|
next
|
@@ -31,7 +31,7 @@ class RemoteTable
|
|
31
31
|
|
32
32
|
# should we be doing this in ruby?
|
33
33
|
def unescaped_html_without_soft_hyphens
|
34
|
-
str = ::CGI.unescapeHTML ::IO.read(t.local_file.path)
|
34
|
+
str = ::CGI.unescapeHTML utf8(::IO.read(t.local_file.path))
|
35
35
|
# get rid of MS Office baddies
|
36
36
|
str.gsub! /­/, ''
|
37
37
|
str
|
data/lib/remote_table/version.rb
CHANGED
data/test/test_remote_table.rb
CHANGED
@@ -59,4 +59,10 @@ class TestRemoteTable < Test::Unit::TestCase
|
|
59
59
|
t.free
|
60
60
|
assert_equal NilClass, t.instance_variable_get(:@to_a).class
|
61
61
|
end
|
62
|
+
|
63
|
+
# fixes ArgumentError: invalid byte sequence in UTF-8
|
64
|
+
should %{safely read non-utf8 html} do
|
65
|
+
t = RemoteTable.new :url => "http://www.faa.gov/air_traffic/publications/atpubs/CNT/5-2-B.htm", :encoding => 'windows-1252', :row_xpath => '//table/tr[2]/td/table/tr', :column_xpath => 'td'
|
66
|
+
assert_equal 'AGUSTA', t.rows[0]['Manufacturer']
|
67
|
+
end
|
62
68
|
end
|
metadata
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: remote_table
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 29
|
5
4
|
prerelease:
|
6
|
-
|
7
|
-
- 1
|
8
|
-
- 1
|
9
|
-
- 7
|
10
|
-
version: 1.1.7
|
5
|
+
version: 1.1.8
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
8
|
- Seamus Abshere
|
@@ -16,7 +11,7 @@ autorequire:
|
|
16
11
|
bindir: bin
|
17
12
|
cert_chain: []
|
18
13
|
|
19
|
-
date: 2011-04-
|
14
|
+
date: 2011-04-22 00:00:00 -05:00
|
20
15
|
default_executable:
|
21
16
|
dependencies:
|
22
17
|
- !ruby/object:Gem::Dependency
|
@@ -27,11 +22,6 @@ dependencies:
|
|
27
22
|
requirements:
|
28
23
|
- - ">="
|
29
24
|
- !ruby/object:Gem::Version
|
30
|
-
hash: 11
|
31
|
-
segments:
|
32
|
-
- 2
|
33
|
-
- 3
|
34
|
-
- 4
|
35
25
|
version: 2.3.4
|
36
26
|
type: :runtime
|
37
27
|
version_requirements: *id001
|
@@ -43,10 +33,6 @@ dependencies:
|
|
43
33
|
requirements:
|
44
34
|
- - ~>
|
45
35
|
- !ruby/object:Gem::Version
|
46
|
-
hash: 29
|
47
|
-
segments:
|
48
|
-
- 1
|
49
|
-
- 9
|
50
36
|
version: "1.9"
|
51
37
|
type: :runtime
|
52
38
|
version_requirements: *id002
|
@@ -58,11 +44,6 @@ dependencies:
|
|
58
44
|
requirements:
|
59
45
|
- - ">="
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
hash: 411
|
62
|
-
segments:
|
63
|
-
- 0
|
64
|
-
- 99
|
65
|
-
- 4
|
66
47
|
version: 0.99.4
|
67
48
|
type: :runtime
|
68
49
|
version_requirements: *id003
|
@@ -74,9 +55,6 @@ dependencies:
|
|
74
55
|
requirements:
|
75
56
|
- - ">="
|
76
57
|
- !ruby/object:Gem::Version
|
77
|
-
hash: 3
|
78
|
-
segments:
|
79
|
-
- 0
|
80
58
|
version: "0"
|
81
59
|
type: :runtime
|
82
60
|
version_requirements: *id004
|
@@ -88,9 +66,6 @@ dependencies:
|
|
88
66
|
requirements:
|
89
67
|
- - ">="
|
90
68
|
- !ruby/object:Gem::Version
|
91
|
-
hash: 3
|
92
|
-
segments:
|
93
|
-
- 0
|
94
69
|
version: "0"
|
95
70
|
type: :runtime
|
96
71
|
version_requirements: *id005
|
@@ -102,9 +77,6 @@ dependencies:
|
|
102
77
|
requirements:
|
103
78
|
- - ">="
|
104
79
|
- !ruby/object:Gem::Version
|
105
|
-
hash: 3
|
106
|
-
segments:
|
107
|
-
- 0
|
108
80
|
version: "0"
|
109
81
|
type: :runtime
|
110
82
|
version_requirements: *id006
|
@@ -116,11 +88,6 @@ dependencies:
|
|
116
88
|
requirements:
|
117
89
|
- - ">="
|
118
90
|
- !ruby/object:Gem::Version
|
119
|
-
hash: 5
|
120
|
-
segments:
|
121
|
-
- 1
|
122
|
-
- 4
|
123
|
-
- 1
|
124
91
|
version: 1.4.1
|
125
92
|
type: :runtime
|
126
93
|
version_requirements: *id007
|
@@ -132,9 +99,6 @@ dependencies:
|
|
132
99
|
requirements:
|
133
100
|
- - ">="
|
134
101
|
- !ruby/object:Gem::Version
|
135
|
-
hash: 3
|
136
|
-
segments:
|
137
|
-
- 0
|
138
102
|
version: "0"
|
139
103
|
type: :runtime
|
140
104
|
version_requirements: *id008
|
@@ -146,9 +110,6 @@ dependencies:
|
|
146
110
|
requirements:
|
147
111
|
- - ">="
|
148
112
|
- !ruby/object:Gem::Version
|
149
|
-
hash: 3
|
150
|
-
segments:
|
151
|
-
- 0
|
152
113
|
version: "0"
|
153
114
|
type: :runtime
|
154
115
|
version_requirements: *id009
|
@@ -160,11 +121,6 @@ dependencies:
|
|
160
121
|
requirements:
|
161
122
|
- - ">="
|
162
123
|
- !ruby/object:Gem::Version
|
163
|
-
hash: 23
|
164
|
-
segments:
|
165
|
-
- 0
|
166
|
-
- 0
|
167
|
-
- 4
|
168
124
|
version: 0.0.4
|
169
125
|
type: :runtime
|
170
126
|
version_requirements: *id010
|
@@ -176,9 +132,6 @@ dependencies:
|
|
176
132
|
requirements:
|
177
133
|
- - ">="
|
178
134
|
- !ruby/object:Gem::Version
|
179
|
-
hash: 3
|
180
|
-
segments:
|
181
|
-
- 0
|
182
135
|
version: "0"
|
183
136
|
type: :runtime
|
184
137
|
version_requirements: *id011
|
@@ -190,86 +143,53 @@ dependencies:
|
|
190
143
|
requirements:
|
191
144
|
- - ">="
|
192
145
|
- !ruby/object:Gem::Version
|
193
|
-
hash: 3
|
194
|
-
segments:
|
195
|
-
- 0
|
196
146
|
version: "0"
|
197
147
|
type: :runtime
|
198
148
|
version_requirements: *id012
|
199
149
|
- !ruby/object:Gem::Dependency
|
200
|
-
name:
|
150
|
+
name: errata
|
201
151
|
prerelease: false
|
202
152
|
requirement: &id013 !ruby/object:Gem::Requirement
|
203
153
|
none: false
|
204
154
|
requirements:
|
205
155
|
- - ">="
|
206
156
|
- !ruby/object:Gem::Version
|
207
|
-
|
208
|
-
|
209
|
-
- 1
|
210
|
-
- 5
|
211
|
-
- 0
|
212
|
-
version: 1.5.0
|
213
|
-
type: :runtime
|
157
|
+
version: 0.2.0
|
158
|
+
type: :development
|
214
159
|
version_requirements: *id013
|
215
160
|
- !ruby/object:Gem::Dependency
|
216
|
-
name:
|
161
|
+
name: test-unit
|
217
162
|
prerelease: false
|
218
163
|
requirement: &id014 !ruby/object:Gem::Requirement
|
219
164
|
none: false
|
220
165
|
requirements:
|
221
166
|
- - ">="
|
222
167
|
- !ruby/object:Gem::Version
|
223
|
-
|
224
|
-
segments:
|
225
|
-
- 0
|
226
|
-
- 2
|
227
|
-
- 0
|
228
|
-
version: 0.2.0
|
168
|
+
version: "0"
|
229
169
|
type: :development
|
230
170
|
version_requirements: *id014
|
231
171
|
- !ruby/object:Gem::Dependency
|
232
|
-
name:
|
172
|
+
name: shoulda
|
233
173
|
prerelease: false
|
234
174
|
requirement: &id015 !ruby/object:Gem::Requirement
|
235
175
|
none: false
|
236
176
|
requirements:
|
237
177
|
- - ">="
|
238
178
|
- !ruby/object:Gem::Version
|
239
|
-
hash: 3
|
240
|
-
segments:
|
241
|
-
- 0
|
242
179
|
version: "0"
|
243
180
|
type: :development
|
244
181
|
version_requirements: *id015
|
245
182
|
- !ruby/object:Gem::Dependency
|
246
|
-
name:
|
183
|
+
name: ruby-debug19
|
247
184
|
prerelease: false
|
248
185
|
requirement: &id016 !ruby/object:Gem::Requirement
|
249
186
|
none: false
|
250
187
|
requirements:
|
251
188
|
- - ">="
|
252
189
|
- !ruby/object:Gem::Version
|
253
|
-
hash: 3
|
254
|
-
segments:
|
255
|
-
- 0
|
256
190
|
version: "0"
|
257
191
|
type: :development
|
258
192
|
version_requirements: *id016
|
259
|
-
- !ruby/object:Gem::Dependency
|
260
|
-
name: ruby-debug
|
261
|
-
prerelease: false
|
262
|
-
requirement: &id017 !ruby/object:Gem::Requirement
|
263
|
-
none: false
|
264
|
-
requirements:
|
265
|
-
- - ">="
|
266
|
-
- !ruby/object:Gem::Version
|
267
|
-
hash: 3
|
268
|
-
segments:
|
269
|
-
- 0
|
270
|
-
version: "0"
|
271
|
-
type: :development
|
272
|
-
version_requirements: *id017
|
273
193
|
description: Gives you a standard way to parse various formats and treat them as an array of hashes.
|
274
194
|
email:
|
275
195
|
- seamus@abshere.net
|
@@ -323,18 +243,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
323
243
|
requirements:
|
324
244
|
- - ">="
|
325
245
|
- !ruby/object:Gem::Version
|
326
|
-
hash: 3
|
327
|
-
segments:
|
328
|
-
- 0
|
329
246
|
version: "0"
|
330
247
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
331
248
|
none: false
|
332
249
|
requirements:
|
333
250
|
- - ">="
|
334
251
|
- !ruby/object:Gem::Version
|
335
|
-
hash: 3
|
336
|
-
segments:
|
337
|
-
- 0
|
338
252
|
version: "0"
|
339
253
|
requirements: []
|
340
254
|
|