mine_shaft 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/LICENSE +20 -0
- data/README.textile +93 -0
- data/Rakefile +9 -0
- data/lib/mine_shaft.rb +13 -0
- data/lib/mine_shaft/errors.rb +5 -0
- data/lib/mine_shaft/html_table.rb +101 -0
- data/lib/mine_shaft/login_page.rb +51 -0
- data/lib/mine_shaft/shaft.rb +49 -0
- data/lib/mine_shaft/user_agent.rb +109 -0
- data/lib/mine_shaft/version.rb +3 -0
- data/lib/mine_shaft/web_page.rb +31 -0
- data/spec/fixtures/failed_login.html +118 -0
- data/spec/fixtures/home_page.html +3 -0
- data/spec/fixtures/login.html +117 -0
- data/spec/fixtures/multiple_tables.html +212 -0
- data/spec/fixtures/projects_table.html +16 -0
- data/spec/fixtures/wiki_page_with_multiple_tables.html +193 -0
- data/spec/fixtures/wiki_page_with_no_table_id.html +176 -0
- data/spec/fixtures/wiki_page_with_projects_table.html +176 -0
- data/spec/mine_shaft/html_table_spec.rb +60 -0
- data/spec/mine_shaft/shaft_spec.rb +56 -0
- data/spec/mine_shaft/user_agent_spec.rb +62 -0
- data/spec/mine_shaft/web_page_spec.rb +25 -0
- data/spec/spec.opts +4 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/support/mine_shaft_helpers.rb +9 -0
- metadata +169 -0
data/CHANGELOG
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Tom Kersten, http://tomkersten.com/
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.textile
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
h1. MineShaft: Seed data from your stakeholders
|
2
|
+
|
3
|
+
h2. Overview
|
4
|
+
|
5
|
+
MineShaft uses @mechanize@ to walk through a Remine project site and parse content
|
6
|
+
which can't be easily accessed via the REST API (yet). The gem also provides a
|
7
|
+
simple way to deserialize an HTML table into an Array of Hash objects which are
|
8
|
+
key-value pairs of the table heading(s) and the corresponding value for each row
|
9
|
+
in the table.
|
10
|
+
|
11
|
+
h2. Motivation
|
12
|
+
|
13
|
+
We were looking for some domain-specific information to work with in an
|
14
|
+
application and wanted to feed off of the knowledge of some of our stakeholders.
|
15
|
+
We didn't want to build out an admin interface which would be replaced with an
|
16
|
+
automated system down the road and having our users edit text files and send
|
17
|
+
them to us was not going to work. We created a @Seed_Data@ wiki page and threw
|
18
|
+
in a few tables with ID's and wrote MineShaft to parse those tables and convert
|
19
|
+
them into Hashes we could use to add/update content in the application database.
|
20
|
+
Using this method allow(s) us to spread data entry & review requests for to a
|
21
|
+
wide variety of people in an efficient manner.
|
22
|
+
|
23
|
+
h2. Usage
|
24
|
+
|
25
|
+
Assuming you have a Redmine installation set up at @http://your.rm-install.com@
|
26
|
+
and a project named @twitter4biz@ set up.
|
27
|
+
|
28
|
+
Given a wiki page titled "Seed_Data" with the following table definition
|
29
|
+
_somewhere_ in that page (note: the table ID must be present):
|
30
|
+
|
31
|
+
bc. table(#companies).
|
32
|
+
|Ticker|Name |
|
33
|
+
|AAPL |Apple |
|
34
|
+
|MSFT |Microsoft|
|
35
|
+
|GOOG |Google |
|
36
|
+
|YHOO |Yahoo |
|
37
|
+
|
38
|
+
Then in IRB (or whatever):
|
39
|
+
|
40
|
+
bc. require 'mine_shaft'
|
41
|
+
include MineShaft
|
42
|
+
shaft = Shaft.new('rm-username', 'rm-password', 'http://your.rm-install.com')
|
43
|
+
companies = shaft.grab("companies", '/projects/twitter4biz/wiki/Seed_Data')
|
44
|
+
=> [{:name => 'Apple', :ticker => 'AAPL'}, {:name => 'Microsoft', :ticker => 'MSFT'},...]
|
45
|
+
|
46
|
+
So, in the db/seeds.rb file of your Rails app, you could put something like the
|
47
|
+
following (assuming you have also included the previous code example):
|
48
|
+
|
49
|
+
bc. companies.each do |attributes|
|
50
|
+
company = Company.find_by_ticker(attributes[:ticker])
|
51
|
+
if company.nil?
|
52
|
+
Company.create!(attributes)
|
53
|
+
puts "Added '#{attributes[:ticker]}'"
|
54
|
+
else
|
55
|
+
ticker = attributes.delete(:ticker)
|
56
|
+
company.update(attributes)
|
57
|
+
puts "Updated '#{ticker}'"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
...and then run:
|
62
|
+
|
63
|
+
bc. rake db:seed
|
64
|
+
|
65
|
+
h2. Installation
|
66
|
+
|
67
|
+
bc. gem install mine_shaft
|
68
|
+
|
69
|
+
h2. Contributing
|
70
|
+
|
71
|
+
# Fork it...
|
72
|
+
# bundle install
|
73
|
+
# ...make awesomeness
|
74
|
+
# Commit (ideally) to a feature-branch
|
75
|
+
# Send a pull request
|
76
|
+
|
77
|
+
h2. Found a bug?
|
78
|
+
|
79
|
+
File an issue on the project's "issues page":https://github.com/gn-research/mine_shaft/issues
|
80
|
+
|
81
|
+
h2. Dependencies
|
82
|
+
|
83
|
+
* mechanize
|
84
|
+
|
85
|
+
h2. License
|
86
|
+
|
87
|
+
Refer to LICENSE file (hint: MIT)
|
88
|
+
|
89
|
+
h2. Future Plans
|
90
|
+
|
91
|
+
The gem is meeting our needs at the moment, so we don't have any plans to add
|
92
|
+
significant functionality at the moment. However, it has come in quite handy
|
93
|
+
so far, so we may end up expanding it further if a new need arises.
|
data/Rakefile
ADDED
data/lib/mine_shaft.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# Redmine doesn't support XML auth to wiki pages...we need to get dirty w/ it
|
2
|
+
|
3
|
+
require 'mechanize'
|
4
|
+
require 'mine_shaft/errors'
|
5
|
+
require 'mine_shaft/html_table'
|
6
|
+
require 'mine_shaft/login_page'
|
7
|
+
require 'mine_shaft/shaft'
|
8
|
+
require 'mine_shaft/user_agent'
|
9
|
+
require 'mine_shaft/web_page'
|
10
|
+
|
11
|
+
module MineShaft
|
12
|
+
include Errors
|
13
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module MineShaft
|
2
|
+
# Provides several convenience methods for translating a (machinist-) parsed
|
3
|
+
# HTML table into standard Ruby data structures. All tables are assumed to
|
4
|
+
# have a "heading" row as the first row, and that header uses <td> elements
|
5
|
+
# (instead of <th>).
|
6
|
+
class HTMLTable
|
7
|
+
# Public: Initialize a new HTMLTable with the specified table-data as parse
|
8
|
+
# by machinist (or Nokogiri).
|
9
|
+
#
|
10
|
+
# parsed_table - A Nokogiri::HTML::Document or Nokogiri::XML::Element scoped
|
11
|
+
# to only the HTML table you are interested in. Technically
|
12
|
+
# speaking, you could pass in more content than just the
|
13
|
+
# <table> element and it would likely work fine, but that is
|
14
|
+
# the anticipated content structure.
|
15
|
+
#
|
16
|
+
# Returns an instance of HTMLTable
|
17
|
+
def initialize(parsed_table)
|
18
|
+
@table = parsed_table
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Retrieve the content of all the <td> elements from the table,
|
22
|
+
# except for the first row.
|
23
|
+
#
|
24
|
+
# Returns an Array of Array elements, each one being the content from one
|
25
|
+
# row of the table. The returned content does NOT include the first row,
|
26
|
+
# as it is assumed to be the heading of the table.
|
27
|
+
def content_rows
|
28
|
+
table_content = td_elements[column_count, td_elements.size]
|
29
|
+
table_content.enum_slice(column_count).to_a
|
30
|
+
end
|
31
|
+
|
32
|
+
# Public: Converts HTML table to an Array of Hash objects, using the column
|
33
|
+
# headings as keys for each Hash element.
|
34
|
+
#
|
35
|
+
# Examples
|
36
|
+
#
|
37
|
+
# Given 'names' was initialized with the following table:
|
38
|
+
#
|
39
|
+
# ---------------------
|
40
|
+
# |Name |Number |
|
41
|
+
# ---------------------
|
42
|
+
# |John |123-456-7890|
|
43
|
+
# ---------------------
|
44
|
+
#
|
45
|
+
# names.deserialize
|
46
|
+
# # => [{:name => "John", :number => "123-456-7890"}]
|
47
|
+
#
|
48
|
+
# Returns an Array of Hash objects. Each Hash element is a
|
49
|
+
# key-value mapping of "table header"-"row content". (Note that the
|
50
|
+
# the key is a downcased-symbol of the heading value).
|
51
|
+
def deserialize
|
52
|
+
content_rows.map do |row_cells|
|
53
|
+
symbolized_headings.inject({}) do |all_attributes, current_attribute|
|
54
|
+
index_of_header = symbolized_headings.index(current_attribute)
|
55
|
+
value = row_cells[index_of_header]
|
56
|
+
all_attributes.merge({current_attribute.to_sym => value})
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Public: Retrieves the content from all <td> elements in the table.
|
62
|
+
#
|
63
|
+
# Returns an Array of the content contained in each <td> element.
|
64
|
+
def td_elements
|
65
|
+
@table.search("td").map(&:content)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Public: Retrieves the content from the <td> elements of the first row of
|
69
|
+
# the table.
|
70
|
+
#
|
71
|
+
# Returns an Array of the content contained in each <td> element of the
|
72
|
+
# first row.
|
73
|
+
def headings
|
74
|
+
td_elements.slice(0,column_count)
|
75
|
+
end
|
76
|
+
alias :headers :headings
|
77
|
+
|
78
|
+
# Public: Converts the return value of #headings to an Array of
|
79
|
+
# lower-cased Symbol elements.
|
80
|
+
#
|
81
|
+
# Returns an Array of Symbol elements.
|
82
|
+
def symbolized_headings
|
83
|
+
headings.map {|header| header.downcase.to_sym}
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
# Counts the number of columns in the table.
|
88
|
+
#
|
89
|
+
# Returns the number of columns.
|
90
|
+
def column_count
|
91
|
+
td_elements.count / row_count
|
92
|
+
end
|
93
|
+
|
94
|
+
# Counts the number of rows in the table.
|
95
|
+
#
|
96
|
+
# Returns the number of rows.
|
97
|
+
def row_count
|
98
|
+
@table.search("tr").count
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module MineShaft
|
2
|
+
# Collection of methods applicable to the login page, essentially simplifying
|
3
|
+
# the process of interacting with the login form for signing in.
|
4
|
+
class LoginPage
|
5
|
+
# The relative URL for the login page on a Redmine site
|
6
|
+
LOGIN_FORM_ACTION = '/login'
|
7
|
+
|
8
|
+
# Public: Instantiates a new LoginPage object.
|
9
|
+
#
|
10
|
+
# page - A Nokogiri::HTML::Document.
|
11
|
+
#
|
12
|
+
# Returns a new instance of a LoginPage.
|
13
|
+
# Raises InvalidPage if the specified page does not contain the login form.
|
14
|
+
def initialize(page)
|
15
|
+
@page = page
|
16
|
+
raise InvalidPage, "Page specified does not appear to be the login page" if !login_page?
|
17
|
+
end
|
18
|
+
|
19
|
+
# Public: Confirms whether the specified page is the Redmine login page or
|
20
|
+
# not.
|
21
|
+
#
|
22
|
+
# page - A Nokogiri::HTML::Document.
|
23
|
+
#
|
24
|
+
# Returns true if the specified page is the login page.
|
25
|
+
# Returns false if the specified page is not the login page.
|
26
|
+
def self.valid?(page)
|
27
|
+
begin
|
28
|
+
new(page)
|
29
|
+
return true
|
30
|
+
rescue InvalidPage
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Public: Retrieves the login form from the page.
|
36
|
+
#
|
37
|
+
# Returns an instance of the Mechanize::Form class.
|
38
|
+
def login_form
|
39
|
+
@login_form ||= @page.forms.find {|f| f.action == LOGIN_FORM_ACTION}
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
# Confirms whether the page is the login page or not
|
44
|
+
#
|
45
|
+
# Returns true if the login form was found
|
46
|
+
# Returns false if the login form was not found
|
47
|
+
def login_page?
|
48
|
+
!!login_form
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module MineShaft
|
2
|
+
# Provides simple interface for deserializing an id'd HTML table from a
|
3
|
+
# specific page of a Redmine project site.
|
4
|
+
class Shaft
|
5
|
+
# The relative URL for the login page on a Redmine site
|
6
|
+
LOGIN_PAGE_URL = "/login"
|
7
|
+
|
8
|
+
# Public: Initializes new instance of Shaft class.
|
9
|
+
#
|
10
|
+
# username - The username to log in with on the specified Redmine site.
|
11
|
+
# password - The password to log in with on the specified Redmine site.
|
12
|
+
# base_uri - The URL of the Redmine installation.
|
13
|
+
#
|
14
|
+
# Examples
|
15
|
+
#
|
16
|
+
# shaft = Shaft.new('uname', 'password', 'http://myredmineinstall.com')
|
17
|
+
#
|
18
|
+
# Returns a new instance of the Shaft class.
|
19
|
+
def initialize(username, password, base_uri)
|
20
|
+
@agent = UserAgent.new(username, password, base_uri)
|
21
|
+
@login_action = @login_page = LOGIN_PAGE_URL
|
22
|
+
end
|
23
|
+
|
24
|
+
# Public: Logs in and parses the specified page for a <table> with the
|
25
|
+
# specified ID.
|
26
|
+
#
|
27
|
+
# table_id - The HTML id of the desired table as a String.
|
28
|
+
# relative_wiki_page_url - The relative URL of the page within the Redmine
|
29
|
+
# site.
|
30
|
+
#
|
31
|
+
# Examples
|
32
|
+
#
|
33
|
+
# shaft.grab('names', '/projects/name-parser/Wiki/Name_Data')
|
34
|
+
#
|
35
|
+
# Returns an Array of Hash objects. Each Hash element is a
|
36
|
+
# key-value mapping of "table header"-"row content". (Note that the
|
37
|
+
# the key is a downcased-symbol of the heading value).
|
38
|
+
# Raises FailedLogin if the login failed
|
39
|
+
# Raises InvalidPage if the login page of the site renders a 404
|
40
|
+
# OR if a table with the supplied ID is not found on the
|
41
|
+
# specified page.
|
42
|
+
def grab(table_id, relative_wiki_page_url)
|
43
|
+
@agent.log_in
|
44
|
+
wiki_page = WebPage.new(@agent.get(relative_wiki_page_url))
|
45
|
+
requested_table = wiki_page.find_table(table_id)
|
46
|
+
requested_table.deserialize
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module MineShaft
|
2
|
+
# Acts as a headless browser to log in and interact with a Redmine site.
|
3
|
+
class UserAgent
|
4
|
+
# The action on relative URL to hit for the login page on a Redmine site.
|
5
|
+
LOGIN_ACTION = '/login'
|
6
|
+
|
7
|
+
# Public: Creates a new instance of the UserAgent class.
|
8
|
+
#
|
9
|
+
# username - The username to use for logging into the Redmine site.
|
10
|
+
# password - The password to use for logging into the Redmine site.
|
11
|
+
# base_uri - The URL of the Redmine site.
|
12
|
+
#
|
13
|
+
# Returns a new instance of the UserAgent class.
|
14
|
+
def initialize(username, password, base_uri)
|
15
|
+
@username = username
|
16
|
+
@password = password
|
17
|
+
@base_uri = base_uri
|
18
|
+
@agent = Mechanize.new
|
19
|
+
@logged_in = false
|
20
|
+
end
|
21
|
+
|
22
|
+
# Public: Retrieves the specified page from the Redmine site.
|
23
|
+
#
|
24
|
+
# page - The relative URL of the page to retrieve as a String.
|
25
|
+
#
|
26
|
+
# Returns the page pased into a Mechanize::Page object.
|
27
|
+
def get(page)
|
28
|
+
@current_page = @agent.get("#{@base_uri}#{page}")
|
29
|
+
end
|
30
|
+
|
31
|
+
# Public: Logs into the Redmine site using credentials specified on object
|
32
|
+
# instantiation.
|
33
|
+
#
|
34
|
+
# Returns true if login process was successful.
|
35
|
+
# Raises FailedLogin if the login was not successful.
|
36
|
+
# Raises InvalidPage if the specified site returns a 404 response code.
|
37
|
+
def log_in
|
38
|
+
return true if logged_in?
|
39
|
+
fill_out_login_form
|
40
|
+
submit(login_form, login_form.buttons.first)
|
41
|
+
|
42
|
+
if back_on_login_page?
|
43
|
+
raise FailedLogin, "Login failed. Please verify username & password"
|
44
|
+
end
|
45
|
+
@logged_in = true
|
46
|
+
rescue Mechanize::ResponseCodeError
|
47
|
+
raise InvalidPage, "'#{@base_uri}' is returning a 404. Please verify the URL is a functioning Redmine installation"
|
48
|
+
end
|
49
|
+
|
50
|
+
# Public: Submits the specified form by clicking the specified button on
|
51
|
+
# said form.
|
52
|
+
#
|
53
|
+
# form - A Mechanize::Form object.
|
54
|
+
# button - A Mechanize::Form::Submit object.
|
55
|
+
#
|
56
|
+
# Returns the page resulting from the submission process as a
|
57
|
+
# Mechanize::Page object.
|
58
|
+
def submit(form, button)
|
59
|
+
@current_page = @agent.submit(form, button)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Public: Confirms whether or not the UserAgent instance is logged in.
|
63
|
+
#
|
64
|
+
# Returns true if the UserAgent is currently logged in.
|
65
|
+
# Returns false if the UserAgent is not currently logged in.
|
66
|
+
def logged_in?
|
67
|
+
@logged_in
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
# Enters the username & password specified during instantiation into the
|
72
|
+
# username & password text fields of the login form.
|
73
|
+
#
|
74
|
+
# Returns nothing.
|
75
|
+
def fill_out_login_form
|
76
|
+
login_form.username = @username
|
77
|
+
login_form.password = @password
|
78
|
+
nil
|
79
|
+
end
|
80
|
+
|
81
|
+
# Convenience method to retrieve the login form ffrom the Redmine
|
82
|
+
# installation's login page.
|
83
|
+
#
|
84
|
+
# Returns the login form (caches form object, so you can interact with the
|
85
|
+
# return value directly on the method).
|
86
|
+
def login_form
|
87
|
+
@login_form ||= load_login_page && @login_page.login_form
|
88
|
+
end
|
89
|
+
|
90
|
+
# Retrieves and caches the login page of the Redmine installation.
|
91
|
+
#
|
92
|
+
# Returns a LoginPage object (object is cached, so you can interact with
|
93
|
+
# the return value directly on the method).
|
94
|
+
def load_login_page
|
95
|
+
@login_page ||= LoginPage.new(get(LOGIN_ACTION))
|
96
|
+
end
|
97
|
+
|
98
|
+
# Confirms whether the current page is the login page...used to detect if
|
99
|
+
# a login failed.
|
100
|
+
#
|
101
|
+
# Returns true if the current page is the Redmine installation's login
|
102
|
+
# page
|
103
|
+
# Returns false if the current page is not the Redmine installation's login
|
104
|
+
# page
|
105
|
+
def back_on_login_page?
|
106
|
+
return LoginPage.valid?(@current_page)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|