csv_from_table 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/Gemfile.lock +19 -0
- data/lib/csv_from_table.rb +65 -0
- data/specs/csv_from_table_spec.rb +60 -0
- data/specs/sample_table.html +47 -0
- metadata +52 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
GEM
|
2
|
+
specs:
|
3
|
+
diff-lcs (1.1.3)
|
4
|
+
nokogiri (1.5.0)
|
5
|
+
rspec (2.6.0)
|
6
|
+
rspec-core (~> 2.6.0)
|
7
|
+
rspec-expectations (~> 2.6.0)
|
8
|
+
rspec-mocks (~> 2.6.0)
|
9
|
+
rspec-core (2.6.4)
|
10
|
+
rspec-expectations (2.6.0)
|
11
|
+
diff-lcs (~> 1.1.2)
|
12
|
+
rspec-mocks (2.6.0)
|
13
|
+
|
14
|
+
PLATFORMS
|
15
|
+
ruby
|
16
|
+
|
17
|
+
DEPENDENCIES
|
18
|
+
nokogiri
|
19
|
+
rspec
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
Bundler.require
|
4
|
+
|
5
|
+
class String
|
6
|
+
|
7
|
+
def csv_from_table(options={})
|
8
|
+
@csv_from_table = CSVFromTable.new
|
9
|
+
array = @csv_from_table.ar_from_table(self, options)
|
10
|
+
@csv_from_table.ar_to_csv(array)
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
class CSVFromTable
|
16
|
+
|
17
|
+
def ar_from_table(table_string, options={})
|
18
|
+
@options = options
|
19
|
+
doc = Nokogiri::HTML(table_string)
|
20
|
+
if !doc.css("table").empty?
|
21
|
+
@headers = table_headers(table_string)
|
22
|
+
lines = []
|
23
|
+
doc.css("tr").each do |node|
|
24
|
+
cells = []
|
25
|
+
node.css("td").each_with_index do |c, index|
|
26
|
+
cells << c.text if include_colunm?(index)
|
27
|
+
end
|
28
|
+
lines << cells
|
29
|
+
end
|
30
|
+
lines.keep_if { |el| !el.empty? }
|
31
|
+
else
|
32
|
+
raise TableNotFoundInString, "The string dosen't contain a valid <table> element."
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def include_colunm?(index)
|
37
|
+
if @options[:only]
|
38
|
+
true if @options[:only].include?(@headers[index]) || @options[:only] == @headers[index]
|
39
|
+
elsif @options[:except]
|
40
|
+
true unless @options[:except].include?(@headers[index]) || @options[:except] == @headers[index]
|
41
|
+
else
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def ar_to_csv(array)
|
47
|
+
csv_lines = []
|
48
|
+
array.each do |el|
|
49
|
+
csv_lines << el.join(", ")
|
50
|
+
end
|
51
|
+
csv_lines.join("\n")
|
52
|
+
end
|
53
|
+
|
54
|
+
def table_headers(table_string)
|
55
|
+
doc = Nokogiri::HTML(table_string)
|
56
|
+
headers = []
|
57
|
+
doc.css("th").each do |node|
|
58
|
+
headers << node.text
|
59
|
+
end
|
60
|
+
headers
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
class TableNotFoundInString < StandardError; end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
$LOAD_PATH << '../lib'
|
2
|
+
require 'csv_from_table'
|
3
|
+
|
4
|
+
describe CSVFromTable do
|
5
|
+
|
6
|
+
before do
|
7
|
+
@sample = File.open("sample_table.html").read
|
8
|
+
@csv_from_table = CSVFromTable.new
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should receive a HTML string with a table and return a array of arrays with the table's content" do
|
12
|
+
@csv_from_table.ar_from_table(@sample).length.should == 5
|
13
|
+
@csv_from_table.ar_from_table(@sample).should == [["Baseball bat", "$75.99", "500g", "Probably", "20%"],
|
14
|
+
["Harmonica", "R$ 150,00", "100g", "Sure!", "0%"],
|
15
|
+
["Pencil", "$1", "80g", "Just if there's an eraser", "100%"],
|
16
|
+
["Eraser", "R$ 2,00", "120g", "Sure", "80%"],
|
17
|
+
["Chocolate", "R$ 56", "5 Kg", "Sure", "0%"]]
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should turn a array of arrays into CSV formated string" do
|
21
|
+
sample_ar = @csv_from_table.ar_from_table(@sample)
|
22
|
+
@csv_from_table.ar_to_csv(sample_ar).should == "Baseball bat, $75.99, 500g, Probably, 20%\nHarmonica, R$ 150,00, 100g, Sure!, 0%\nPencil, $1, 80g, Just if there's an eraser, 100%\nEraser, R$ 2,00, 120g, Sure, 80%\nChocolate, R$ 56, 5 Kg, Sure, 0%"
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
describe String do
|
28
|
+
|
29
|
+
before do
|
30
|
+
@sample_html_table = File.open("sample_table.html").read
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should return the CSV format from a table" do
|
34
|
+
@sample_html_table.csv_from_table.should == "Baseball bat, $75.99, 500g, Probably, 20%\nHarmonica, R$ 150,00, 100g, Sure!, 0%\nPencil, $1, 80g, Just if there's an eraser, 100%\nEraser, R$ 2,00, 120g, Sure, 80%\nChocolate, R$ 56, 5 Kg, Sure, 0%"
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should raise error when there's no table on the given string" do
|
38
|
+
sample_html_table = "No html table here!"
|
39
|
+
lambda { sample_html_table.csv_from_table }.should(raise_error(TableNotFoundInString))
|
40
|
+
end
|
41
|
+
|
42
|
+
describe "should receive options to filter colunms" do
|
43
|
+
it ":only => 'colunm name'" do
|
44
|
+
@sample_html_table.csv_from_table(:only => "Thing").should == "Baseball bat\nHarmonica\nPencil\nEraser\nChocolate"
|
45
|
+
end
|
46
|
+
|
47
|
+
it ":only => 'colunm array'" do
|
48
|
+
@sample_html_table.csv_from_table(:only => ["Thing", "Weight"]).should == "Baseball bat, 500g\nHarmonica, 100g\nPencil, 80g\nEraser, 120g\nChocolate, 5 Kg"
|
49
|
+
end
|
50
|
+
|
51
|
+
it ":except => 'colunm name'" do
|
52
|
+
@sample_html_table.csv_from_table(:except => "Price").should == "Baseball bat, 500g, Probably, 20%\nHarmonica, 100g, Sure!, 0%\nPencil, 80g, Just if there's an eraser, 100%\nEraser, 120g, Sure, 80%\nChocolate, 5 Kg, Sure, 0%"
|
53
|
+
end
|
54
|
+
|
55
|
+
it ":except => 'colunm name'" do
|
56
|
+
@sample_html_table.csv_from_table(:except => ["Price", "Is it worthy?", "Chances I forget it there"]).should == "Baseball bat, 500g\nHarmonica, 100g\nPencil, 80g\nEraser, 120g\nChocolate, 5 Kg"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
<h2>What if hter's more markup than the table?</h2>
|
2
|
+
<p>Extra texts shouldn't get into the CSV version of the table.</p>
|
3
|
+
<table>
|
4
|
+
<caption>Table of thing I would take with me in a trip to the moon!</caption>
|
5
|
+
<tr>
|
6
|
+
<th>Thing</th>
|
7
|
+
<th>Price</th>
|
8
|
+
<th>Weight</th>
|
9
|
+
<th>Is it worthy?</th>
|
10
|
+
<th>Chances I forget it there</th>
|
11
|
+
</tr>
|
12
|
+
<tr>
|
13
|
+
<td>Baseball bat</td>
|
14
|
+
<td>$75.99</td>
|
15
|
+
<td>500g</td>
|
16
|
+
<td>Probably</td>
|
17
|
+
<td>20%</td>
|
18
|
+
</tr>
|
19
|
+
<tr>
|
20
|
+
<td>Harmonica</td>
|
21
|
+
<td>R$ 150,00</td>
|
22
|
+
<td>100g</td>
|
23
|
+
<td>Sure!</td>
|
24
|
+
<td>0%</td>
|
25
|
+
</tr>
|
26
|
+
<tr>Text like this shouldn't be in the CSV
|
27
|
+
<td>Pencil</td>
|
28
|
+
<td>$1</td>
|
29
|
+
<td>80g</td>
|
30
|
+
<td>Just if there's an eraser</td>
|
31
|
+
<td>100%</td>
|
32
|
+
</tr>
|
33
|
+
<tr>
|
34
|
+
<td>Eraser</td>
|
35
|
+
<td>R$ 2,00</td>
|
36
|
+
<td>120g</td>
|
37
|
+
<td>Sure</td>
|
38
|
+
<td>80%</td>
|
39
|
+
</tr>
|
40
|
+
<tr>
|
41
|
+
<td>Chocolate</td>
|
42
|
+
<td>R$ 56</td>
|
43
|
+
<td>5 Kg</td>
|
44
|
+
<td>Sure</td>
|
45
|
+
<td>0%</td>
|
46
|
+
</tr>
|
47
|
+
</table>
|
metadata
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv_from_table
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Marco Antonio Fogaça Nogueira
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-12-24 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: Ruby library that spits out CSV from given HTML table, so you can use
|
15
|
+
the view helpers when formatting the CSV without having to include them on you models
|
16
|
+
or controllers.
|
17
|
+
email:
|
18
|
+
- marcofognog@gmail.com
|
19
|
+
executables: []
|
20
|
+
extensions: []
|
21
|
+
extra_rdoc_files: []
|
22
|
+
files:
|
23
|
+
- Gemfile
|
24
|
+
- Gemfile.lock
|
25
|
+
- lib/csv_from_table.rb
|
26
|
+
- specs/csv_from_table_spec.rb
|
27
|
+
- specs/sample_table.html
|
28
|
+
homepage: http://github.com/marcofognog/csv_from_table
|
29
|
+
licenses: []
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
require_paths:
|
33
|
+
- lib
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
none: false
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 1.3.7
|
46
|
+
requirements: []
|
47
|
+
rubyforge_project:
|
48
|
+
rubygems_version: 1.8.6
|
49
|
+
signing_key:
|
50
|
+
specification_version: 3
|
51
|
+
summary: Ruby library that spits out CSV from given HTML table.
|
52
|
+
test_files: []
|