jsoner 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG.md +7 -2
- data/README.md +27 -0
- data/lib/jsoner.rb +6 -3
- data/lib/jsoner/table_factory.rb +1 -1
- data/lib/jsoner/version.rb +1 -1
- data/spec/fixtures/table_extend.rb +46 -0
- data/spec/jsoner/table_factory_spec.rb +23 -0
- data/spec/jsoner_spec.rb +22 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZjUyODljYzQ2MjJhZjdlNTVjM2FkN2EwYjQ1MmMwYzQzZmQ2YTgzYg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NDY2NTIxOWFiZjVkNmU1M2M3NmE0OWY2Mjc1MTM2MGZiZTM4OTA5NQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
M2UwZTJjNTQzZDZiZGExNTZmOThlYzA3MDUzMzE2YTY4YWU0Yzk2NmY4ZWY2
|
10
|
+
ZDA1ZTNhOTllZDRhYjZlM2RkZDE2NGMxY2U1MmRkMDNmYzFhZjQ4NDIwYTM5
|
11
|
+
ZWY4NGQxYTdjNzAwOWExNWJjYjczMzA2ZjE1Njg0ZWE3NjZjYTM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NjQ4YjJmMTk0Y2E4YTdjZDU3ZDcyY2YwNGRlMTlkMTZkNzZlZDM3NTJkMGY2
|
14
|
+
ODY1MjZjNzllODY5ZGQ2M2ZiZmY0MTFhMzNjYTNjOGZjOTUyOTRhOTgyOTY4
|
15
|
+
NTlhMDFjZDc0OGRkOTc3ODg2N2MxNWRkMmY0Nzg2OGFkNDBkNTQ=
|
data/CHANGELOG.md
CHANGED
@@ -3,5 +3,10 @@
|
|
3
3
|
* Get first table element when having more than two in parsed HTML
|
4
4
|
|
5
5
|
## v0.0.3
|
6
|
-
*
|
7
|
-
*
|
6
|
+
* Parse HTML file including HTML Table
|
7
|
+
* Fixing file is empty or have no table element
|
8
|
+
|
9
|
+
## v0.0.4
|
10
|
+
* Add rspec test of checking integrity of HTML table
|
11
|
+
* Supply exception message of NotFullTable
|
12
|
+
* Parse Link including HTML Table
|
data/README.md
CHANGED
@@ -61,7 +61,34 @@ json = Jsoner.parse(html)
|
|
61
61
|
# {"First Name"=>"Adam", "Last Name"=>"Johnson", "Points"=>"67"} ]
|
62
62
|
|
63
63
|
```
|
64
|
+
Or, `Jsoner` can parse HTML file including HTML table
|
64
65
|
|
66
|
+
```ruby
|
67
|
+
require 'open-uri'
|
68
|
+
# you must have table.html file and assign file path correctly.
|
69
|
+
|
70
|
+
table = Jsoner.parse(open('table.html'))
|
71
|
+
|
72
|
+
# output json =>
|
73
|
+
#
|
74
|
+
# [ {"First Name"=>"Jill", "Last Name"=>"Smith", "Points"=>"50"},
|
75
|
+
# {"First Name"=>"Eve", "Last Name"=>"Jackson", "Points"=>"94"},
|
76
|
+
# {"First Name"=>"John", "Last Name"=>"Doe", "Points"=>"80"},
|
77
|
+
# {"First Name"=>"Adam", "Last Name"=>"Johnson", "Points"=>"67"} ]
|
78
|
+
|
79
|
+
```
|
80
|
+
|
81
|
+
Or you can open Link
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
require 'open-uri'
|
85
|
+
|
86
|
+
table = Jsoner.parse(open('http://www.w3school.com.cn/tiy/t.asp?f=html_table_header'))
|
87
|
+
|
88
|
+
# output json =>
|
89
|
+
#
|
90
|
+
# [{"姓名"=>"Bill Gates", "电话"=>"555 77 855"}]
|
91
|
+
```
|
65
92
|
## THANKS
|
66
93
|
|
67
94
|
[table-to-json](https://github.com/lightswitch05/table-to-json) written by [@lightswitch05](https://github.com/lightswitch05) in JavaScript.
|
data/lib/jsoner.rb
CHANGED
@@ -7,14 +7,17 @@ require 'nokogiri'
|
|
7
7
|
module Jsoner
|
8
8
|
|
9
9
|
class NotFullTable < StandardError
|
10
|
+
def message
|
11
|
+
'Incomplete HTML Table'
|
12
|
+
end
|
10
13
|
end
|
11
14
|
|
12
15
|
class << self
|
13
16
|
|
14
17
|
def parse(html)
|
15
|
-
|
16
|
-
if factory =
|
17
|
-
|
18
|
+
# html = filter(html)
|
19
|
+
if factory = TableFactory.check(Nokogiri::HTML.parse(html))
|
20
|
+
Table.new(factory).to_json
|
18
21
|
end
|
19
22
|
end
|
20
23
|
|
data/lib/jsoner/table_factory.rb
CHANGED
data/lib/jsoner/version.rb
CHANGED
@@ -32,3 +32,49 @@ def table_extend_str
|
|
32
32
|
</body>
|
33
33
|
eohtml
|
34
34
|
end
|
35
|
+
|
36
|
+
def no_td_table
|
37
|
+
<<-eohtml
|
38
|
+
<table id='example-table'>
|
39
|
+
<thead>
|
40
|
+
<tr>
|
41
|
+
<th>First Name</th>
|
42
|
+
<th>Last Name</th>
|
43
|
+
<th data-override="Score">Points</th></tr>
|
44
|
+
</thead>
|
45
|
+
</table>
|
46
|
+
eohtml
|
47
|
+
end
|
48
|
+
|
49
|
+
def no_tr_table
|
50
|
+
<<-eohtml
|
51
|
+
<table id='example-table'>
|
52
|
+
</table>
|
53
|
+
eohtml
|
54
|
+
end
|
55
|
+
|
56
|
+
def no_th_table
|
57
|
+
<<-eohtml
|
58
|
+
<table id='example-table'>
|
59
|
+
<tbody>
|
60
|
+
<tr>
|
61
|
+
<td>Jill</td>
|
62
|
+
<td>Smith</td>
|
63
|
+
<td data-override="disqualified">50</td></tr>
|
64
|
+
<tr>
|
65
|
+
<td>Eve</td>
|
66
|
+
<td>Jackson</td>
|
67
|
+
<td>94</td></tr>
|
68
|
+
<tr>
|
69
|
+
<td>John</td>
|
70
|
+
<td>Doe</td>
|
71
|
+
<td>80</td></tr>
|
72
|
+
<tr>
|
73
|
+
<td>Adam</td>
|
74
|
+
<td>Johnson</td>
|
75
|
+
<td>67</td></tr>
|
76
|
+
</tbody>
|
77
|
+
</table>
|
78
|
+
eohtml
|
79
|
+
end
|
80
|
+
|
@@ -52,6 +52,29 @@ describe 'build Hash below from doc parsed by Nokogiki' do
|
|
52
52
|
# TODO testing when having no header in table
|
53
53
|
end
|
54
54
|
|
55
|
+
context "check integrity of HTML table" do
|
56
|
+
|
57
|
+
it "when having no tr element" do
|
58
|
+
doc = Nokogiri::HTML.parse(no_tr_table)
|
59
|
+
expect{ Jsoner::TableFactory.check(doc) }.to raise_error Jsoner::NotFullTable
|
60
|
+
end
|
61
|
+
|
62
|
+
it "when having no td element" do
|
63
|
+
doc = Nokogiri::HTML.parse(no_td_table)
|
64
|
+
expect{ Jsoner::TableFactory.check(doc) }.to raise_error Jsoner::NotFullTable
|
65
|
+
end
|
66
|
+
|
67
|
+
it "when having no th element" do
|
68
|
+
doc = Nokogiri::HTML.parse(no_th_table)
|
69
|
+
expect{ Jsoner::TableFactory.check(doc) }.to raise_error Jsoner::NotFullTable
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should return factory if integrity" do
|
73
|
+
doc = Nokogiri::HTML.parse(table_str)
|
74
|
+
Jsoner::TableFactory.check(doc).should be_instance_of Jsoner::TableFactory
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
55
78
|
context "filter HTML" do
|
56
79
|
|
57
80
|
before :each do
|
data/spec/jsoner_spec.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
Dir["#{File.dirname(__FILE__)}/fixtures/*.rb"].each {|file| require file }
|
2
3
|
require "#{File.dirname(__FILE__)}/../lib/jsoner"
|
3
4
|
|
5
|
+
require 'open-uri'
|
6
|
+
require 'json'
|
7
|
+
|
4
8
|
describe Jsoner do
|
5
9
|
|
6
10
|
it "should match data from json of fixtures" do
|
@@ -8,10 +12,26 @@ describe Jsoner do
|
|
8
12
|
end
|
9
13
|
|
10
14
|
it "should parse file including table" do
|
11
|
-
Jsoner.parse("#{File.dirname(__FILE__)}/fixtures/table.html").should == json
|
15
|
+
Jsoner.parse(open("#{File.dirname(__FILE__)}/fixtures/table.html")).should == json
|
12
16
|
end
|
13
17
|
|
14
18
|
it "should raise error when having no full table in HTML file" do
|
15
|
-
expect{ Jsoner.parse("#{File.dirname(__FILE__)}/fixtures/table_extend.html")}.to raise_error Jsoner::NotFullTable
|
19
|
+
expect{ Jsoner.parse(open("#{File.dirname(__FILE__)}/fixtures/table_extend.html"))}.to raise_error Jsoner::NotFullTable
|
20
|
+
end
|
21
|
+
|
22
|
+
# http://www.w3school.com.cn/tiy/t.asp?f=html_table_headers
|
23
|
+
it "should parse Link include table" do
|
24
|
+
json = Jsoner.parse(open("http://www.w3school.com.cn/tiy/t.asp?f=html_table_headers"))
|
25
|
+
JSON.parse(json).should == [{"姓名"=>"Bill Gates", "电话"=>"555 77 855"}]
|
26
|
+
end
|
27
|
+
|
28
|
+
context "data parsed" do
|
29
|
+
it "it is HTML file" do
|
30
|
+
Jsoner.filter(open("#{File.dirname(__FILE__)}/fixtures/table.html")).should == table_str
|
31
|
+
end
|
32
|
+
|
33
|
+
it "it is String" do
|
34
|
+
Jsoner.filter("<table></table>").should == "<table></table>"
|
35
|
+
end
|
16
36
|
end
|
17
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jsoner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- simlegate
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|