mine_shaft 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/LICENSE +20 -0
- data/README.textile +93 -0
- data/Rakefile +9 -0
- data/lib/mine_shaft.rb +13 -0
- data/lib/mine_shaft/errors.rb +5 -0
- data/lib/mine_shaft/html_table.rb +101 -0
- data/lib/mine_shaft/login_page.rb +51 -0
- data/lib/mine_shaft/shaft.rb +49 -0
- data/lib/mine_shaft/user_agent.rb +109 -0
- data/lib/mine_shaft/version.rb +3 -0
- data/lib/mine_shaft/web_page.rb +31 -0
- data/spec/fixtures/failed_login.html +118 -0
- data/spec/fixtures/home_page.html +3 -0
- data/spec/fixtures/login.html +117 -0
- data/spec/fixtures/multiple_tables.html +212 -0
- data/spec/fixtures/projects_table.html +16 -0
- data/spec/fixtures/wiki_page_with_multiple_tables.html +193 -0
- data/spec/fixtures/wiki_page_with_no_table_id.html +176 -0
- data/spec/fixtures/wiki_page_with_projects_table.html +176 -0
- data/spec/mine_shaft/html_table_spec.rb +60 -0
- data/spec/mine_shaft/shaft_spec.rb +56 -0
- data/spec/mine_shaft/user_agent_spec.rb +62 -0
- data/spec/mine_shaft/web_page_spec.rb +25 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/support/mine_shaft_helpers.rb +9 -0
- metadata +169 -0
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mine_shaft'
|
3
|
+
include MineShaft
|
4
|
+
|
5
|
+
describe HTMLTable do
|
6
|
+
let(:raw_table) {Nokogiri::HTML(File.read(fixture('projects_table.html')))}
|
7
|
+
let(:table) {HTMLTable.new(raw_table)}
|
8
|
+
|
9
|
+
describe "#deserialize" do
|
10
|
+
it "returns a Hash with the same number of elements as there are content rows (non-headings)" do
|
11
|
+
#raise table.deserialize.inspect
|
12
|
+
table.deserialize.should have(2).items
|
13
|
+
end
|
14
|
+
|
15
|
+
it "downcases & symbolizes the headings and uses them as the Hash keys" do
|
16
|
+
table.deserialize.first.keys.should have(2).items
|
17
|
+
table.deserialize.first.keys.should include(:name)
|
18
|
+
table.deserialize.first.keys.should include(:owner)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "maps the associated value for each column to the heading hash key" do
|
22
|
+
table.deserialize.first[:name].should == "Sunbeam"
|
23
|
+
table.deserialize.first[:owner].should == "John Doe"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe ".content_rows" do
|
28
|
+
it "returns a array with the same number of rows as the table (minus the heading)" do
|
29
|
+
table.content_rows.should have(2).items
|
30
|
+
end
|
31
|
+
|
32
|
+
it "does not include the content from the header" do
|
33
|
+
table.content_rows.flatten.should_not include("Name")
|
34
|
+
table.content_rows.flatten.should_not include("Owner")
|
35
|
+
end
|
36
|
+
|
37
|
+
it "does include the <td> content for non-header rows" do
|
38
|
+
table.content_rows.should include(["Sunbeam", "John Doe"])
|
39
|
+
table.content_rows.should include(["Truby", "Jane Doe"])
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe ".td_elements" do
|
44
|
+
it "returns an array of all the <td> elements in the table" do
|
45
|
+
table.should have(6).td_elements
|
46
|
+
end
|
47
|
+
|
48
|
+
it "returns the textual content in the <td> elements" do
|
49
|
+
table.td_elements.should include("Sunbeam")
|
50
|
+
table.td_elements.should include("Name")
|
51
|
+
table.td_elements.should include("John Doe")
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
describe ".headings" do
|
56
|
+
it "assumes the first row of <td>'s in the table is the header" do
|
57
|
+
table.headings.should == %w(Name Owner)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require "mine_shaft"
|
3
|
+
|
4
|
+
include MineShaft
|
5
|
+
|
6
|
+
describe Shaft do
|
7
|
+
let(:base_uri) {'http://redmine.example.com'}
|
8
|
+
let(:login_page) {'/login'}
|
9
|
+
let(:shaft) {Shaft.new('username', 'password', base_uri)}
|
10
|
+
|
11
|
+
before(:each) do
|
12
|
+
agent = UserAgent.new('username', 'password', base_uri)
|
13
|
+
agent.stub!(:log_in).and_return(true)
|
14
|
+
UserAgent.stub(:new).with('username', 'password', base_uri).and_return(agent)
|
15
|
+
end
|
16
|
+
|
17
|
+
describe ".grab" do
|
18
|
+
let(:wiki_page) {'/projects/test/wiki/Wiki'}
|
19
|
+
|
20
|
+
context "when the specified table id does not exist on the page" do
|
21
|
+
before(:each) do
|
22
|
+
FakeWeb.register_uri(:get,
|
23
|
+
"#{base_uri}#{wiki_page}",
|
24
|
+
:body => fixture('wiki_page_with_no_table_id.html'),
|
25
|
+
:content_type => 'text/html')
|
26
|
+
end
|
27
|
+
|
28
|
+
it "raises a InvalidPage exception" do
|
29
|
+
expect {
|
30
|
+
shaft.grab('my-table-id', wiki_page)
|
31
|
+
}.should raise_error(InvalidPage)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
context "when the specified table id does not exist on the page" do
|
36
|
+
before(:each) do
|
37
|
+
FakeWeb.register_uri(:get,
|
38
|
+
"#{base_uri}#{wiki_page}",
|
39
|
+
:body => fixture('wiki_page_with_projects_table.html'),
|
40
|
+
:content_type => 'text/html')
|
41
|
+
end
|
42
|
+
|
43
|
+
it "returns an array with the same number of elements as are in the table (minus header)" do
|
44
|
+
shaft.grab('projects', wiki_page).should have(2).items
|
45
|
+
end
|
46
|
+
|
47
|
+
it "returns elements with key/value pairs of heading-name/row-value for each row" do
|
48
|
+
results = shaft.grab('projects', wiki_page)
|
49
|
+
results.first[:name].should == "Sunbeam"
|
50
|
+
results.first[:owner].should == "John Doe"
|
51
|
+
results.last[:name].should == "Truby"
|
52
|
+
results.last[:owner].should == "Jane Doe"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mine_shaft'
|
3
|
+
include MineShaft
|
4
|
+
|
5
|
+
describe UserAgent do
|
6
|
+
let(:base_uri) {'http://redmine.example.com'}
|
7
|
+
let(:login_page) {'/login'}
|
8
|
+
let(:agent) {UserAgent.new('username', 'password', base_uri)}
|
9
|
+
|
10
|
+
describe ".log_in" do
|
11
|
+
before(:each) do
|
12
|
+
FakeWeb.register_uri(:get, "#{base_uri}#{login_page}", response)
|
13
|
+
end
|
14
|
+
|
15
|
+
context "when it receives a 404 response code" do
|
16
|
+
let(:response) {{:body => "<html></html>", :content_type => 'text/html', :status => [404, "Not Found"]}}
|
17
|
+
|
18
|
+
it "raises an InvalidPage error" do
|
19
|
+
expect {agent.log_in}.should raise_error(InvalidPage)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context "when it does not find the login form" do
|
24
|
+
let(:response) {{:body => "<html></html>", :content_type => 'text/html'}}
|
25
|
+
|
26
|
+
it "raises a InvalidPage exception" do
|
27
|
+
expect {agent.log_in}.should raise_error(InvalidPage)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context "when it finds the login form" do
|
32
|
+
before(:each) do
|
33
|
+
login_response = {:body => fixture('login.html'), :content_type => 'text/html'}
|
34
|
+
FakeWeb.register_uri(:get, "#{base_uri}#{login_page}", login_response)
|
35
|
+
end
|
36
|
+
|
37
|
+
context "and a successful login occurs" do
|
38
|
+
let(:response) {{:body => fixture('home_page.html'), :content_type => 'text/html'}}
|
39
|
+
|
40
|
+
before(:each) do
|
41
|
+
FakeWeb.register_uri(:post, "#{base_uri}#{login_page}", response)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "returns true" do
|
45
|
+
agent.log_in.should be_true
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context "and the login fails (likely due to username/password)" do
|
50
|
+
let(:response) {{:body => fixture('failed_login.html'), :content_type => 'text/html'}}
|
51
|
+
|
52
|
+
before(:each) do
|
53
|
+
FakeWeb.register_uri(:post, "#{base_uri}#{login_page}", response)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "raises a FailedLogin exception" do
|
57
|
+
expect {agent.log_in}.should raise_error(FailedLogin)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mine_shaft'
|
3
|
+
include MineShaft
|
4
|
+
|
5
|
+
describe WebPage do
|
6
|
+
describe ".find_table" do
|
7
|
+
let(:page) {WebPage.new(parsed_page)}
|
8
|
+
|
9
|
+
context "when the specified table id does not exist on the page" do
|
10
|
+
let(:parsed_page) {Nokogiri::HTML(File.read(fixture('wiki_page_with_no_table_id.html')))}
|
11
|
+
|
12
|
+
it "raises a InvalidPage exception" do
|
13
|
+
expect {page.find_table('my-table-id')}.should raise_error(InvalidPage)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context "when the specified table exists on the page" do
|
18
|
+
let(:parsed_page) {Nokogiri::HTML(File.read(fixture('wiki_page_with_projects_table.html')))}
|
19
|
+
|
20
|
+
it "returns an HTMLTable" do
|
21
|
+
page.find_table('projects').should be_instance_of(HTMLTable)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mine_shaft
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Tom Kersten
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-27 00:00:00 -06:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: mechanize
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 23
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 0
|
33
|
+
- 0
|
34
|
+
version: 1.0.0
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: rake
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 3
|
46
|
+
segments:
|
47
|
+
- 0
|
48
|
+
version: "0"
|
49
|
+
type: :development
|
50
|
+
version_requirements: *id002
|
51
|
+
- !ruby/object:Gem::Dependency
|
52
|
+
name: rspec
|
53
|
+
prerelease: false
|
54
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ~>
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
hash: 11
|
60
|
+
segments:
|
61
|
+
- 2
|
62
|
+
- 1
|
63
|
+
- 0
|
64
|
+
version: 2.1.0
|
65
|
+
type: :development
|
66
|
+
version_requirements: *id003
|
67
|
+
- !ruby/object:Gem::Dependency
|
68
|
+
name: fakeweb
|
69
|
+
prerelease: false
|
70
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
hash: 27
|
76
|
+
segments:
|
77
|
+
- 1
|
78
|
+
- 3
|
79
|
+
- 0
|
80
|
+
version: 1.3.0
|
81
|
+
type: :development
|
82
|
+
version_requirements: *id004
|
83
|
+
description: Provides an easy way to authenticate and grab content from a Redmine project. A specific use-case was to deserialize HTML tables from the wiki into an Array of Hashes with key/value pairs of "table heading" -> "column value" in order to simplify collection of a seed data from domain experts.
|
84
|
+
email:
|
85
|
+
- tkersten@gnresound.com
|
86
|
+
executables: []
|
87
|
+
|
88
|
+
extensions: []
|
89
|
+
|
90
|
+
extra_rdoc_files: []
|
91
|
+
|
92
|
+
files:
|
93
|
+
- spec/mine_shaft/html_table_spec.rb
|
94
|
+
- spec/mine_shaft/shaft_spec.rb
|
95
|
+
- spec/mine_shaft/user_agent_spec.rb
|
96
|
+
- spec/mine_shaft/web_page_spec.rb
|
97
|
+
- spec/spec_helper.rb
|
98
|
+
- spec/support/mine_shaft_helpers.rb
|
99
|
+
- lib/mine_shaft/errors.rb
|
100
|
+
- lib/mine_shaft/html_table.rb
|
101
|
+
- lib/mine_shaft/login_page.rb
|
102
|
+
- lib/mine_shaft/shaft.rb
|
103
|
+
- lib/mine_shaft/user_agent.rb
|
104
|
+
- lib/mine_shaft/version.rb
|
105
|
+
- lib/mine_shaft/web_page.rb
|
106
|
+
- lib/mine_shaft.rb
|
107
|
+
- Rakefile
|
108
|
+
- README.textile
|
109
|
+
- CHANGELOG
|
110
|
+
- LICENSE
|
111
|
+
- spec/fixtures/failed_login.html
|
112
|
+
- spec/fixtures/home_page.html
|
113
|
+
- spec/fixtures/login.html
|
114
|
+
- spec/fixtures/multiple_tables.html
|
115
|
+
- spec/fixtures/projects_table.html
|
116
|
+
- spec/fixtures/wiki_page_with_multiple_tables.html
|
117
|
+
- spec/fixtures/wiki_page_with_no_table_id.html
|
118
|
+
- spec/fixtures/wiki_page_with_projects_table.html
|
119
|
+
- spec/spec.opts
|
120
|
+
has_rdoc: true
|
121
|
+
homepage: http://github.com/gn-research/mine_shaft
|
122
|
+
licenses: []
|
123
|
+
|
124
|
+
post_install_message:
|
125
|
+
rdoc_options: []
|
126
|
+
|
127
|
+
require_paths:
|
128
|
+
- lib
|
129
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
130
|
+
none: false
|
131
|
+
requirements:
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
hash: 3
|
135
|
+
segments:
|
136
|
+
- 0
|
137
|
+
version: "0"
|
138
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
139
|
+
none: false
|
140
|
+
requirements:
|
141
|
+
- - ">="
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
hash: 3
|
144
|
+
segments:
|
145
|
+
- 0
|
146
|
+
version: "0"
|
147
|
+
requirements: []
|
148
|
+
|
149
|
+
rubyforge_project: mine_shaft
|
150
|
+
rubygems_version: 1.3.7
|
151
|
+
signing_key:
|
152
|
+
specification_version: 3
|
153
|
+
summary: "Scrape & transform content from areas of a Redmine project which do not yet provide a REST API (ex: the wiki)"
|
154
|
+
test_files:
|
155
|
+
- spec/fixtures/failed_login.html
|
156
|
+
- spec/fixtures/home_page.html
|
157
|
+
- spec/fixtures/login.html
|
158
|
+
- spec/fixtures/multiple_tables.html
|
159
|
+
- spec/fixtures/projects_table.html
|
160
|
+
- spec/fixtures/wiki_page_with_multiple_tables.html
|
161
|
+
- spec/fixtures/wiki_page_with_no_table_id.html
|
162
|
+
- spec/fixtures/wiki_page_with_projects_table.html
|
163
|
+
- spec/mine_shaft/html_table_spec.rb
|
164
|
+
- spec/mine_shaft/shaft_spec.rb
|
165
|
+
- spec/mine_shaft/user_agent_spec.rb
|
166
|
+
- spec/mine_shaft/web_page_spec.rb
|
167
|
+
- spec/spec.opts
|
168
|
+
- spec/spec_helper.rb
|
169
|
+
- spec/support/mine_shaft_helpers.rb
|