jsoner 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG.md +7 -2
- data/README.md +27 -0
- data/lib/jsoner.rb +6 -3
- data/lib/jsoner/table_factory.rb +1 -1
- data/lib/jsoner/version.rb +1 -1
- data/spec/fixtures/table_extend.rb +46 -0
- data/spec/jsoner/table_factory_spec.rb +23 -0
- data/spec/jsoner_spec.rb +22 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZjUyODljYzQ2MjJhZjdlNTVjM2FkN2EwYjQ1MmMwYzQzZmQ2YTgzYg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NDY2NTIxOWFiZjVkNmU1M2M3NmE0OWY2Mjc1MTM2MGZiZTM4OTA5NQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
M2UwZTJjNTQzZDZiZGExNTZmOThlYzA3MDUzMzE2YTY4YWU0Yzk2NmY4ZWY2
|
10
|
+
ZDA1ZTNhOTllZDRhYjZlM2RkZDE2NGMxY2U1MmRkMDNmYzFhZjQ4NDIwYTM5
|
11
|
+
ZWY4NGQxYTdjNzAwOWExNWJjYjczMzA2ZjE1Njg0ZWE3NjZjYTM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NjQ4YjJmMTk0Y2E4YTdjZDU3ZDcyY2YwNGRlMTlkMTZkNzZlZDM3NTJkMGY2
|
14
|
+
ODY1MjZjNzllODY5ZGQ2M2ZiZmY0MTFhMzNjYTNjOGZjOTUyOTRhOTgyOTY4
|
15
|
+
NTlhMDFjZDc0OGRkOTc3ODg2N2MxNWRkMmY0Nzg2OGFkNDBkNTQ=
|
data/CHANGELOG.md
CHANGED
@@ -3,5 +3,10 @@
|
|
3
3
|
* Get first table element when having more than two in parsed HTML
|
4
4
|
|
5
5
|
## v0.0.3
|
6
|
-
*
|
7
|
-
*
|
6
|
+
* Parse HTML file including HTML Table
|
7
|
+
* Fixing file is empty or have no table element
|
8
|
+
|
9
|
+
## v0.0.4
|
10
|
+
* Add rspec test of checking integrity of HTML table
|
11
|
+
* Supply exception message of NotFullTable
|
12
|
+
* Parse Link including HTML Table
|
data/README.md
CHANGED
@@ -61,7 +61,34 @@ json = Jsoner.parse(html)
|
|
61
61
|
# {"First Name"=>"Adam", "Last Name"=>"Johnson", "Points"=>"67"} ]
|
62
62
|
|
63
63
|
```
|
64
|
+
Or, `Jsoner` can parse HTML file including HTML table
|
64
65
|
|
66
|
+
```ruby
|
67
|
+
require 'open-uri'
|
68
|
+
# you must have table.html file and assign file path correctly.
|
69
|
+
|
70
|
+
table = Jsoner.parse(open('table.html'))
|
71
|
+
|
72
|
+
# output json =>
|
73
|
+
#
|
74
|
+
# [ {"First Name"=>"Jill", "Last Name"=>"Smith", "Points"=>"50"},
|
75
|
+
# {"First Name"=>"Eve", "Last Name"=>"Jackson", "Points"=>"94"},
|
76
|
+
# {"First Name"=>"John", "Last Name"=>"Doe", "Points"=>"80"},
|
77
|
+
# {"First Name"=>"Adam", "Last Name"=>"Johnson", "Points"=>"67"} ]
|
78
|
+
|
79
|
+
```
|
80
|
+
|
81
|
+
Or you can open Link
|
82
|
+
|
83
|
+
```ruby
|
84
|
+
require 'open-uri'
|
85
|
+
|
86
|
+
table = Jsoner.parse(open('http://www.w3school.com.cn/tiy/t.asp?f=html_table_header'))
|
87
|
+
|
88
|
+
# output json =>
|
89
|
+
#
|
90
|
+
# [{"姓名"=>"Bill Gates", "电话"=>"555 77 855"}]
|
91
|
+
```
|
65
92
|
## THANKS
|
66
93
|
|
67
94
|
[table-to-json](https://github.com/lightswitch05/table-to-json) written by [@lightswitch05](https://github.com/lightswitch05) in JavaScript.
|
data/lib/jsoner.rb
CHANGED
@@ -7,14 +7,17 @@ require 'nokogiri'
|
|
7
7
|
module Jsoner
|
8
8
|
|
9
9
|
class NotFullTable < StandardError
|
10
|
+
def message
|
11
|
+
'Incomplete HTML Table'
|
12
|
+
end
|
10
13
|
end
|
11
14
|
|
12
15
|
class << self
|
13
16
|
|
14
17
|
def parse(html)
|
15
|
-
|
16
|
-
if factory =
|
17
|
-
|
18
|
+
# html = filter(html)
|
19
|
+
if factory = TableFactory.check(Nokogiri::HTML.parse(html))
|
20
|
+
Table.new(factory).to_json
|
18
21
|
end
|
19
22
|
end
|
20
23
|
|
data/lib/jsoner/table_factory.rb
CHANGED
data/lib/jsoner/version.rb
CHANGED
@@ -32,3 +32,49 @@ def table_extend_str
|
|
32
32
|
</body>
|
33
33
|
eohtml
|
34
34
|
end
|
35
|
+
|
36
|
+
def no_td_table
|
37
|
+
<<-eohtml
|
38
|
+
<table id='example-table'>
|
39
|
+
<thead>
|
40
|
+
<tr>
|
41
|
+
<th>First Name</th>
|
42
|
+
<th>Last Name</th>
|
43
|
+
<th data-override="Score">Points</th></tr>
|
44
|
+
</thead>
|
45
|
+
</table>
|
46
|
+
eohtml
|
47
|
+
end
|
48
|
+
|
49
|
+
def no_tr_table
|
50
|
+
<<-eohtml
|
51
|
+
<table id='example-table'>
|
52
|
+
</table>
|
53
|
+
eohtml
|
54
|
+
end
|
55
|
+
|
56
|
+
def no_th_table
|
57
|
+
<<-eohtml
|
58
|
+
<table id='example-table'>
|
59
|
+
<tbody>
|
60
|
+
<tr>
|
61
|
+
<td>Jill</td>
|
62
|
+
<td>Smith</td>
|
63
|
+
<td data-override="disqualified">50</td></tr>
|
64
|
+
<tr>
|
65
|
+
<td>Eve</td>
|
66
|
+
<td>Jackson</td>
|
67
|
+
<td>94</td></tr>
|
68
|
+
<tr>
|
69
|
+
<td>John</td>
|
70
|
+
<td>Doe</td>
|
71
|
+
<td>80</td></tr>
|
72
|
+
<tr>
|
73
|
+
<td>Adam</td>
|
74
|
+
<td>Johnson</td>
|
75
|
+
<td>67</td></tr>
|
76
|
+
</tbody>
|
77
|
+
</table>
|
78
|
+
eohtml
|
79
|
+
end
|
80
|
+
|
@@ -52,6 +52,29 @@ describe 'build Hash below from doc parsed by Nokogiki' do
|
|
52
52
|
# TODO testing when having no header in table
|
53
53
|
end
|
54
54
|
|
55
|
+
context "check integrity of HTML table" do
|
56
|
+
|
57
|
+
it "when having no tr element" do
|
58
|
+
doc = Nokogiri::HTML.parse(no_tr_table)
|
59
|
+
expect{ Jsoner::TableFactory.check(doc) }.to raise_error Jsoner::NotFullTable
|
60
|
+
end
|
61
|
+
|
62
|
+
it "when having no td element" do
|
63
|
+
doc = Nokogiri::HTML.parse(no_td_table)
|
64
|
+
expect{ Jsoner::TableFactory.check(doc) }.to raise_error Jsoner::NotFullTable
|
65
|
+
end
|
66
|
+
|
67
|
+
it "when having no th element" do
|
68
|
+
doc = Nokogiri::HTML.parse(no_th_table)
|
69
|
+
expect{ Jsoner::TableFactory.check(doc) }.to raise_error Jsoner::NotFullTable
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should return factory if integrity" do
|
73
|
+
doc = Nokogiri::HTML.parse(table_str)
|
74
|
+
Jsoner::TableFactory.check(doc).should be_instance_of Jsoner::TableFactory
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
55
78
|
context "filter HTML" do
|
56
79
|
|
57
80
|
before :each do
|
data/spec/jsoner_spec.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
Dir["#{File.dirname(__FILE__)}/fixtures/*.rb"].each {|file| require file }
|
2
3
|
require "#{File.dirname(__FILE__)}/../lib/jsoner"
|
3
4
|
|
5
|
+
require 'open-uri'
|
6
|
+
require 'json'
|
7
|
+
|
4
8
|
describe Jsoner do
|
5
9
|
|
6
10
|
it "should match data from json of fixtures" do
|
@@ -8,10 +12,26 @@ describe Jsoner do
|
|
8
12
|
end
|
9
13
|
|
10
14
|
it "should parse file including table" do
|
11
|
-
Jsoner.parse("#{File.dirname(__FILE__)}/fixtures/table.html").should == json
|
15
|
+
Jsoner.parse(open("#{File.dirname(__FILE__)}/fixtures/table.html")).should == json
|
12
16
|
end
|
13
17
|
|
14
18
|
it "should raise error when having no full table in HTML file" do
|
15
|
-
expect{ Jsoner.parse("#{File.dirname(__FILE__)}/fixtures/table_extend.html")}.to raise_error Jsoner::NotFullTable
|
19
|
+
expect{ Jsoner.parse(open("#{File.dirname(__FILE__)}/fixtures/table_extend.html"))}.to raise_error Jsoner::NotFullTable
|
20
|
+
end
|
21
|
+
|
22
|
+
# http://www.w3school.com.cn/tiy/t.asp?f=html_table_headers
|
23
|
+
it "should parse Link include table" do
|
24
|
+
json = Jsoner.parse(open("http://www.w3school.com.cn/tiy/t.asp?f=html_table_headers"))
|
25
|
+
JSON.parse(json).should == [{"姓名"=>"Bill Gates", "电话"=>"555 77 855"}]
|
26
|
+
end
|
27
|
+
|
28
|
+
context "data parsed" do
|
29
|
+
it "it is HTML file" do
|
30
|
+
Jsoner.filter(open("#{File.dirname(__FILE__)}/fixtures/table.html")).should == table_str
|
31
|
+
end
|
32
|
+
|
33
|
+
it "it is String" do
|
34
|
+
Jsoner.filter("<table></table>").should == "<table></table>"
|
35
|
+
end
|
16
36
|
end
|
17
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jsoner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- simlegate
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|