mine_shaft 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ require 'spec_helper'
2
+ require 'mine_shaft'
3
+ include MineShaft
4
+
5
+ describe HTMLTable do
6
+ let(:raw_table) {Nokogiri::HTML(File.read(fixture('projects_table.html')))}
7
+ let(:table) {HTMLTable.new(raw_table)}
8
+
9
+ describe "#deserialize" do
10
+ it "returns a Hash with the same number of elements as there are content rows (non-headings)" do
11
+ #raise table.deserialize.inspect
12
+ table.deserialize.should have(2).items
13
+ end
14
+
15
+ it "downcases & symbolizes the headings and uses them as the Hash keys" do
16
+ table.deserialize.first.keys.should have(2).items
17
+ table.deserialize.first.keys.should include(:name)
18
+ table.deserialize.first.keys.should include(:owner)
19
+ end
20
+
21
+ it "maps the associated value for each column to the heading hash key" do
22
+ table.deserialize.first[:name].should == "Sunbeam"
23
+ table.deserialize.first[:owner].should == "John Doe"
24
+ end
25
+ end
26
+
27
+ describe ".content_rows" do
28
+ it "returns a array with the same number of rows as the table (minus the heading)" do
29
+ table.content_rows.should have(2).items
30
+ end
31
+
32
+ it "does not include the content from the header" do
33
+ table.content_rows.flatten.should_not include("Name")
34
+ table.content_rows.flatten.should_not include("Owner")
35
+ end
36
+
37
+ it "does include the <td> content for non-header rows" do
38
+ table.content_rows.should include(["Sunbeam", "John Doe"])
39
+ table.content_rows.should include(["Truby", "Jane Doe"])
40
+ end
41
+ end
42
+
43
+ describe ".td_elements" do
44
+ it "returns an array of all the <td> elements in the table" do
45
+ table.should have(6).td_elements
46
+ end
47
+
48
+ it "returns the textual content in the <td> elements" do
49
+ table.td_elements.should include("Sunbeam")
50
+ table.td_elements.should include("Name")
51
+ table.td_elements.should include("John Doe")
52
+ end
53
+ end
54
+
55
+ describe ".headings" do
56
+ it "assumes the first row of <td>'s in the table is the header" do
57
+ table.headings.should == %w(Name Owner)
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,56 @@
1
+ require 'spec_helper'
2
+ require "mine_shaft"
3
+
4
+ include MineShaft
5
+
6
+ describe Shaft do
7
+ let(:base_uri) {'http://redmine.example.com'}
8
+ let(:login_page) {'/login'}
9
+ let(:shaft) {Shaft.new('username', 'password', base_uri)}
10
+
11
+ before(:each) do
12
+ agent = UserAgent.new('username', 'password', base_uri)
13
+ agent.stub!(:log_in).and_return(true)
14
+ UserAgent.stub(:new).with('username', 'password', base_uri).and_return(agent)
15
+ end
16
+
17
+ describe ".grab" do
18
+ let(:wiki_page) {'/projects/test/wiki/Wiki'}
19
+
20
+ context "when the specified table id does not exist on the page" do
21
+ before(:each) do
22
+ FakeWeb.register_uri(:get,
23
+ "#{base_uri}#{wiki_page}",
24
+ :body => fixture('wiki_page_with_no_table_id.html'),
25
+ :content_type => 'text/html')
26
+ end
27
+
28
+ it "raises a InvalidPage exception" do
29
+ expect {
30
+ shaft.grab('my-table-id', wiki_page)
31
+ }.should raise_error(InvalidPage)
32
+ end
33
+ end
34
+
35
+ context "when the specified table id does not exist on the page" do
36
+ before(:each) do
37
+ FakeWeb.register_uri(:get,
38
+ "#{base_uri}#{wiki_page}",
39
+ :body => fixture('wiki_page_with_projects_table.html'),
40
+ :content_type => 'text/html')
41
+ end
42
+
43
+ it "returns an array with the same number of elements as are in the table (minus header)" do
44
+ shaft.grab('projects', wiki_page).should have(2).items
45
+ end
46
+
47
+ it "returns elements with key/value pairs of heading-name/row-value for each row" do
48
+ results = shaft.grab('projects', wiki_page)
49
+ results.first[:name].should == "Sunbeam"
50
+ results.first[:owner].should == "John Doe"
51
+ results.last[:name].should == "Truby"
52
+ results.last[:owner].should == "Jane Doe"
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,62 @@
1
+ require 'spec_helper'
2
+ require 'mine_shaft'
3
+ include MineShaft
4
+
5
+ describe UserAgent do
6
+ let(:base_uri) {'http://redmine.example.com'}
7
+ let(:login_page) {'/login'}
8
+ let(:agent) {UserAgent.new('username', 'password', base_uri)}
9
+
10
+ describe ".log_in" do
11
+ before(:each) do
12
+ FakeWeb.register_uri(:get, "#{base_uri}#{login_page}", response)
13
+ end
14
+
15
+ context "when it receives a 404 response code" do
16
+ let(:response) {{:body => "<html></html>", :content_type => 'text/html', :status => [404, "Not Found"]}}
17
+
18
+ it "raises an InvalidPage error" do
19
+ expect {agent.log_in}.should raise_error(InvalidPage)
20
+ end
21
+ end
22
+
23
+ context "when it does not find the login form" do
24
+ let(:response) {{:body => "<html></html>", :content_type => 'text/html'}}
25
+
26
+ it "raises a InvalidPage exception" do
27
+ expect {agent.log_in}.should raise_error(InvalidPage)
28
+ end
29
+ end
30
+
31
+ context "when it finds the login form" do
32
+ before(:each) do
33
+ login_response = {:body => fixture('login.html'), :content_type => 'text/html'}
34
+ FakeWeb.register_uri(:get, "#{base_uri}#{login_page}", login_response)
35
+ end
36
+
37
+ context "and a successful login occurs" do
38
+ let(:response) {{:body => fixture('home_page.html'), :content_type => 'text/html'}}
39
+
40
+ before(:each) do
41
+ FakeWeb.register_uri(:post, "#{base_uri}#{login_page}", response)
42
+ end
43
+
44
+ it "returns true" do
45
+ agent.log_in.should be_true
46
+ end
47
+ end
48
+
49
+ context "and the login fails (likely due to username/password)" do
50
+ let(:response) {{:body => fixture('failed_login.html'), :content_type => 'text/html'}}
51
+
52
+ before(:each) do
53
+ FakeWeb.register_uri(:post, "#{base_uri}#{login_page}", response)
54
+ end
55
+
56
+ it "raises a FailedLogin exception" do
57
+ expect {agent.log_in}.should raise_error(FailedLogin)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+ require 'mine_shaft'
3
+ include MineShaft
4
+
5
+ describe WebPage do
6
+ describe ".find_table" do
7
+ let(:page) {WebPage.new(parsed_page)}
8
+
9
+ context "when the specified table id does not exist on the page" do
10
+ let(:parsed_page) {Nokogiri::HTML(File.read(fixture('wiki_page_with_no_table_id.html')))}
11
+
12
+ it "raises a InvalidPage exception" do
13
+ expect {page.find_table('my-table-id')}.should raise_error(InvalidPage)
14
+ end
15
+ end
16
+
17
+ context "when the specified table exists on the page" do
18
+ let(:parsed_page) {Nokogiri::HTML(File.read(fixture('wiki_page_with_projects_table.html')))}
19
+
20
+ it "returns an HTMLTable" do
21
+ page.find_table('projects').should be_instance_of(HTMLTable)
22
+ end
23
+ end
24
+ end
25
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,4 @@
1
+ --colour
2
+ --format
3
+ profile
4
+ --diff
@@ -0,0 +1,8 @@
1
+ require 'lib/mine_shaft'
2
+ require 'fakeweb'
3
+
4
+ Dir.glob('spec/support/**/*.rb').each {|f| require f}
5
+
6
+ RSpec.configure do |config|
7
+ config.include MineShaftHelpers
8
+ end
@@ -0,0 +1,9 @@
1
+ module MineShaftHelpers
2
+ def fixture(filename)
3
+ filepath = File.expand_path("spec/fixtures/#{filename}")
4
+ raise "Fixture file not found" unless File.exists?(filepath)
5
+ filepath
6
+ end
7
+ end
8
+
9
+ include MineShaftHelpers
metadata ADDED
@@ -0,0 +1,169 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mine_shaft
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Tom Kersten
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-27 00:00:00 -06:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: mechanize
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 23
30
+ segments:
31
+ - 1
32
+ - 0
33
+ - 0
34
+ version: 1.0.0
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: rake
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 3
46
+ segments:
47
+ - 0
48
+ version: "0"
49
+ type: :development
50
+ version_requirements: *id002
51
+ - !ruby/object:Gem::Dependency
52
+ name: rspec
53
+ prerelease: false
54
+ requirement: &id003 !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ~>
58
+ - !ruby/object:Gem::Version
59
+ hash: 11
60
+ segments:
61
+ - 2
62
+ - 1
63
+ - 0
64
+ version: 2.1.0
65
+ type: :development
66
+ version_requirements: *id003
67
+ - !ruby/object:Gem::Dependency
68
+ name: fakeweb
69
+ prerelease: false
70
+ requirement: &id004 !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ hash: 27
76
+ segments:
77
+ - 1
78
+ - 3
79
+ - 0
80
+ version: 1.3.0
81
+ type: :development
82
+ version_requirements: *id004
83
+ description: Provides an easy way to authenticate and grab content from a Redmine project. A specific use-case was to deserialize HTML tables from the wiki into an Array of Hashes with key/value pairs of "table heading" -> "column value" in order to simplify collection of a seed data from domain experts.
84
+ email:
85
+ - tkersten@gnresound.com
86
+ executables: []
87
+
88
+ extensions: []
89
+
90
+ extra_rdoc_files: []
91
+
92
+ files:
93
+ - spec/mine_shaft/html_table_spec.rb
94
+ - spec/mine_shaft/shaft_spec.rb
95
+ - spec/mine_shaft/user_agent_spec.rb
96
+ - spec/mine_shaft/web_page_spec.rb
97
+ - spec/spec_helper.rb
98
+ - spec/support/mine_shaft_helpers.rb
99
+ - lib/mine_shaft/errors.rb
100
+ - lib/mine_shaft/html_table.rb
101
+ - lib/mine_shaft/login_page.rb
102
+ - lib/mine_shaft/shaft.rb
103
+ - lib/mine_shaft/user_agent.rb
104
+ - lib/mine_shaft/version.rb
105
+ - lib/mine_shaft/web_page.rb
106
+ - lib/mine_shaft.rb
107
+ - Rakefile
108
+ - README.textile
109
+ - CHANGELOG
110
+ - LICENSE
111
+ - spec/fixtures/failed_login.html
112
+ - spec/fixtures/home_page.html
113
+ - spec/fixtures/login.html
114
+ - spec/fixtures/multiple_tables.html
115
+ - spec/fixtures/projects_table.html
116
+ - spec/fixtures/wiki_page_with_multiple_tables.html
117
+ - spec/fixtures/wiki_page_with_no_table_id.html
118
+ - spec/fixtures/wiki_page_with_projects_table.html
119
+ - spec/spec.opts
120
+ has_rdoc: true
121
+ homepage: http://github.com/gn-research/mine_shaft
122
+ licenses: []
123
+
124
+ post_install_message:
125
+ rdoc_options: []
126
+
127
+ require_paths:
128
+ - lib
129
+ required_ruby_version: !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ">="
133
+ - !ruby/object:Gem::Version
134
+ hash: 3
135
+ segments:
136
+ - 0
137
+ version: "0"
138
+ required_rubygems_version: !ruby/object:Gem::Requirement
139
+ none: false
140
+ requirements:
141
+ - - ">="
142
+ - !ruby/object:Gem::Version
143
+ hash: 3
144
+ segments:
145
+ - 0
146
+ version: "0"
147
+ requirements: []
148
+
149
+ rubyforge_project: mine_shaft
150
+ rubygems_version: 1.3.7
151
+ signing_key:
152
+ specification_version: 3
153
+ summary: "Scrape & transform content from areas of a Redmine project which do not yet provide a REST API (ex: the wiki)"
154
+ test_files:
155
+ - spec/fixtures/failed_login.html
156
+ - spec/fixtures/home_page.html
157
+ - spec/fixtures/login.html
158
+ - spec/fixtures/multiple_tables.html
159
+ - spec/fixtures/projects_table.html
160
+ - spec/fixtures/wiki_page_with_multiple_tables.html
161
+ - spec/fixtures/wiki_page_with_no_table_id.html
162
+ - spec/fixtures/wiki_page_with_projects_table.html
163
+ - spec/mine_shaft/html_table_spec.rb
164
+ - spec/mine_shaft/shaft_spec.rb
165
+ - spec/mine_shaft/user_agent_spec.rb
166
+ - spec/mine_shaft/web_page_spec.rb
167
+ - spec/spec.opts
168
+ - spec/spec_helper.rb
169
+ - spec/support/mine_shaft_helpers.rb