wiki-api 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: !binary |-
4
+ NjgxOGUxZjQ2MWQ2MjNhMDA2ZGUwMTRhOGI4MWFlOGQ3MzI4MWFjOA==
5
+ data.tar.gz: !binary |-
6
+ ZmZkNDFhMzc0ZTNmZDBlYTFmMTIwMmU5ZDgzYTQ2YjM0ZTk1ZmQzYg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NGM4YTU2MjQ3Njk1MzJkMDhlYjcxODYxNDFkNzRlODI5MjMwNmU5ZGEzZmJj
10
+ MjhjZjYxYzcxMmYzYjA0YzA3NzdlYTJhMjM0ZTllNzgyMDk0MGJiNjBiZWRl
11
+ N2Y5YzMwZWZjZmY3NWQ0YmJiMjdiOTkwOTU1ZmE4MDg5Njk4M2Y=
12
+ data.tar.gz: !binary |-
13
+ MGZlMTYzZTgzZWE3YmYzZmIyMjc0OTZhMGY0NDEwYzJmNmFiMTZkNDM3OGM2
14
+ Mjc1MDdjMzQ3MjM1NmVlODM3Mzg5ZTViMGRmOGI2NzE1NDZjODJhZTA2MjI5
15
+ NWE3YmI4MDYxY2I4NGM3MGUwNzAzNjQ3YjMwODU5NDBlMWYxZDM=
data/README.md CHANGED
@@ -17,17 +17,21 @@ Requests to the MediaWiki API use the following URI structure:
17
17
 
18
18
  ### Roadmap
19
19
 
20
- * Version (0.0.1) (current)
21
-
22
- Initial project.
23
-
24
- * Version (0.0.2)
20
+ * Version (0.0.2) (current)
25
21
 
26
22
  Index important words per block, page, list item;
27
23
 
28
24
  Parse objects for more elements within a Page.
29
25
 
30
26
 
27
+ ### Changelog
28
+
29
+ * Version (0.0.1) -> (0.0.2)
30
+
31
+ Nested ListItems, Links (within Page)
32
+
33
+ Search on Page headline (ignore case, and underscore)
34
+
31
35
 
32
36
  ### Known Issues
33
37
 
@@ -132,7 +136,7 @@ end
132
136
  ```
133
137
 
134
138
 
135
- ### Example (https://en.wikipedia.org/wiki/Ruby_on_rails)
139
+ ### Example using Global config (https://en.wikipedia.org/wiki/Ruby_on_rails)
136
140
 
137
141
  This is a example of querying wikipedia.org on the page: "Ruby_on_rails", and printing the References headline links for each list item.
138
142
 
@@ -161,6 +165,28 @@ end
161
165
 
162
166
 
163
167
 
168
+ ### Example passing URI (https://en.wikipedia.org/wiki/Ruby_on_rails)
169
+
170
+ This is the same example as the one above, except for setting a global config to direct the requests to a given URI.
171
+
172
+ ```ruby
173
+ # querying the page
174
+ page = Wiki::Api::Page.new name: "Ruby_on_rails", uri: "https://en.wikipedia.org"
175
+
176
+ # get headlines with name Reference (there can be multiple headlines with the same name!)
177
+ headlines = page.headline "References"
178
+
179
+ # iterate headlines
180
+ headlines.each do |headline|
181
+ # iterate list items on the given headline
182
+ headline.block.list_items.each do |list_item|
183
+
184
+ # print the uri of all links
185
+ puts list_item.links.map{ |l| l.uri }
186
+
187
+ end
188
+ end
189
+ ```
164
190
 
165
191
 
166
192
 
@@ -10,6 +10,7 @@ module Wiki
10
10
  attr_accessor :uri, :api_path, :api_options, :http, :request, :response, :html, :parsed
11
11
 
12
12
  def initialize(options={})
13
+ @@config ||= nil
13
14
  options.merge! @@config unless @@config.nil?
14
15
  self.uri = options[:uri] if options.include? :uri
15
16
  self.api_path = options[:api_path] if options.include? :api_path
data/lib/wiki/api/page.rb CHANGED
@@ -3,14 +3,16 @@ module Wiki
3
3
 
4
4
  class Page
5
5
 
6
- attr_accessor :name, :parsed_page
6
+ attr_accessor :name, :parsed_page, :uri
7
7
 
8
8
  def initialize(options={})
9
9
  self.name = options[:name] if options.include? :name
10
+ uri = options[:uri] if options.include? :uri
11
+
10
12
  @@config ||= nil
11
- if @@config.nil?
13
+ if @@config.nil? || !uri.nil?
12
14
  # use the connection to collect HTML pages for parsing
13
- @connect = Wiki::Api::Connect.new
15
+ @connect = Wiki::Api::Connect.new uri: uri
14
16
  else
15
17
  # using a local HTML file for parsing
16
18
  end
@@ -79,8 +81,8 @@ module Wiki
79
81
 
80
82
  # get headline nodes by span class
81
83
  xs = self.parsed_page.xpath("//span[@class='mw-headline']")
82
- # filter single headline by name
83
- xs = xs.reject{|t| t.attributes["id"].value != headline_name } unless headline_name.nil?
84
+ # filter single headline by name (ignore case)
85
+ xs = self.filter_headline xs, headline_name unless headline_name.nil?
84
86
 
85
87
  # NOTE: first_part has no id attribute and thus cannot be filtered or processed within xpath (xs)
86
88
  if headline_name == self.name || headline_name.nil?
@@ -119,6 +121,14 @@ module Wiki
119
121
  elements
120
122
  end
121
123
 
124
+ def filter_headline xs, headline_name
125
+ # transform name to a wiki_id (downcase and space replace with underscore)
126
+ headline_name = headline_name.downcase.gsub(" ", "_")
127
+ # reject not matching id's
128
+ xs.reject do |t|
129
+ !t.attributes["id"].value.downcase.start_with?(headline_name)
130
+ end
131
+ end
122
132
 
123
133
  end
124
134
 
data/lib/wiki/api/util.rb CHANGED
@@ -22,6 +22,7 @@ module Wiki
22
22
  result.map{|k,v| v.join("")}
23
23
  end
24
24
 
25
+
25
26
  protected
26
27
  def clean_text text
27
28
  text.gsub(/\n/, " ").squeeze(" ").gsub(/\s(\W)/, '\1').gsub(/(\W)\s/, '\1 ').strip
@@ -1,5 +1,5 @@
1
1
  module Wiki
2
2
  module Api
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'test/unit'
5
+ require File.expand_path(File.dirname(__FILE__) + "/../../lib/wiki/api")
6
+
7
+ #
8
+ # Testing the parsing of URI by passing a uri variable to Page:
9
+ # https://en.wiktionary.org/wiki/Wiktionary:Welcome,_newcomers
10
+ #
11
+
12
+ class WikiPageConfig < Test::Unit::TestCase
13
+
14
+ def setup
15
+ # NOTE: comment Page.config, to use the online MediaWiki instance
16
+ # Wiki::Api::Page.config = PAGE_CONFIG
17
+ # Wiki::Api::Connect.config = GLB_CONFIG
18
+ # @page_name = "Wiktionary:Welcome,_newcomers"
19
+ end
20
+
21
+ def teardown
22
+ end
23
+
24
+ # test simple page invocation
25
+ def test_page_invocation_with_uri
26
+ page = Wiki::Api::Page.new name: "Wiktionary:Welcome,_newcomers", uri: "http://en.wiktionary.org"
27
+ headlines = page.headlines
28
+ assert !headlines.empty?, "expected headlines"
29
+ assert headlines.size < 1, "expected more than one headline"
30
+ headlines.each do |headline|
31
+ assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
32
+ end
33
+ end
34
+
35
+ def test_wrong_page_invocation_with_uri
36
+ begin
37
+ page = Wiki::Api::Page.new name: "A_Wrong_Page_Name", uri: "http://en.wiktionary.org"
38
+ assert false, "expected a failiure"
39
+ rescue Exception => e
40
+ assert true, "expected a failiure"
41
+ end
42
+ end
43
+
44
+ end
45
+
@@ -45,6 +45,7 @@ class WikiPageObject < Test::Unit::TestCase
45
45
  page = Wiki::Api::Page.new name: @page_name
46
46
  headlines = page.headlines
47
47
  assert !headlines.empty?, "expected headlines"
48
+ assert headlines.size > 1, "expected more than one headline"
48
49
  headlines.each do |headline|
49
50
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
50
51
  elements = headline.elements.flatten
@@ -62,6 +63,7 @@ class WikiPageObject < Test::Unit::TestCase
62
63
  page = Wiki::Api::Page.new name: @page_name
63
64
  headlines = page.headlines
64
65
  assert !headlines.empty?, "expected headlines"
66
+ assert headlines.size > 1, "expected more than one headline"
65
67
  headlines.each do |headline|
66
68
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
67
69
  block = headline.block
@@ -74,6 +76,7 @@ class WikiPageObject < Test::Unit::TestCase
74
76
  page = Wiki::Api::Page.new name: @page_name
75
77
  headlines = page.headlines
76
78
  assert !headlines.empty?, "expected headlines"
79
+ assert headlines.size > 1, "expected more than one headline"
77
80
  headlines.each do |headline|
78
81
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
79
82
  block = headline.block
@@ -91,6 +94,7 @@ class WikiPageObject < Test::Unit::TestCase
91
94
  page = Wiki::Api::Page.new name: @page_name
92
95
  headlines = page.headlines
93
96
  assert !headlines.empty?, "expected headlines"
97
+ assert headlines.size > 1, "expected more than one headline"
94
98
  headlines.each do |headline|
95
99
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
96
100
  block = headline.block
@@ -108,6 +112,7 @@ class WikiPageObject < Test::Unit::TestCase
108
112
  page = Wiki::Api::Page.new name: @page_name
109
113
  headlines = page.headlines
110
114
  assert !headlines.empty?, "expected headlines"
115
+ assert headlines.size > 1, "expected more than one headline"
111
116
  headlines.each do |headline|
112
117
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
113
118
  block = headline.block
@@ -126,6 +131,7 @@ class WikiPageObject < Test::Unit::TestCase
126
131
  page = Wiki::Api::Page.new name: @page_name
127
132
  headlines = page.headlines
128
133
  assert !headlines.empty?, "expected headlines"
134
+ assert headlines.size > 1, "expected more than one headline"
129
135
  headlines.each do |headline|
130
136
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
131
137
  block = headline.block
@@ -148,6 +154,7 @@ class WikiPageObject < Test::Unit::TestCase
148
154
  page = Wiki::Api::Page.new name: @page_name
149
155
  headlines = page.headlines
150
156
  assert !headlines.empty?, "expected headlines"
157
+ assert headlines.size > 1, "expected more than one headline"
151
158
 
152
159
  # collect headline names
153
160
  hs = []
@@ -162,10 +169,61 @@ class WikiPageObject < Test::Unit::TestCase
162
169
  headlines = page.headline h
163
170
  # test for at least one (many indicates multiple headlines with the same name)
164
171
  assert !headlines.empty?, "expected a list of headlines"
172
+ assert headlines.size == 1, "expected one headline"
165
173
  headlines.each do |headline|
166
174
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
167
175
  end
168
176
  end
169
177
  end
170
178
 
179
+ def test_page_headline_search_downcase
180
+ page = Wiki::Api::Page.new name: @page_name
181
+
182
+ headlines = page.headline "Editing_Wiktionary"
183
+ assert !headlines.empty?, "expected headlines"
184
+ assert headlines.size == 1, "expected one headline"
185
+
186
+
187
+ # iterate headlines
188
+ headlines.each do |headline|
189
+ assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
190
+ end
191
+
192
+ # search downcase
193
+ headlines = page.headline "editing_wiktionary"
194
+ assert !headlines.empty?, "expected headlines"
195
+
196
+ # iterate headlines
197
+ headlines.each do |headline|
198
+ assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
199
+ end
200
+
201
+ end
202
+
203
+ def test_page_headline_search_regular
204
+ page = Wiki::Api::Page.new name: @page_name
205
+
206
+ headlines = page.headline "Editing_Wiktionary"
207
+ assert !headlines.empty?, "expected headlines"
208
+ assert headlines.size == 1, "expected one headline"
209
+
210
+ # iterate headlines
211
+ headlines.each do |headline|
212
+ assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
213
+ end
214
+
215
+ # search downcase
216
+ headlines = page.headline "editing_wiktionary"
217
+ assert headlines.size == 1, "expected one headline"
218
+
219
+ # search downcase with spaces
220
+ headlines = page.headline "editing wiktionary"
221
+ assert headlines.size == 1, "expected one headline"
222
+
223
+ # search idiot case with spaces
224
+ headlines = page.headline "eDiTinG wiKtiOnarY"
225
+ assert headlines.size == 1, "expected one headline"
226
+
227
+ end
228
+
171
229
  end
data/wiki-api.gemspec CHANGED
@@ -8,9 +8,9 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Wiki::Api::VERSION
9
9
  spec.authors = ["Dennis Blommesteijn"]
10
10
  spec.email = ["dennis@blommesteijn.com"]
11
- spec.description = %q{MediaWiki API and parser}
12
- spec.summary = %q{MediaWiki API and parser}
13
- spec.homepage = ""
11
+ spec.description = %q{MediaWiki API and Page content parser for Headlines, Blocks, Elements, ListItems, and Links.}
12
+ spec.summary = %q{MediaWiki API and Page content parser for Headlines, Blocks, Elements, ListItems, and Links.}
13
+ spec.homepage = "https://github.com/dblommesteijn/wiki-api"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -22,8 +22,8 @@ Gem::Specification.new do |spec|
22
22
  spec.add_development_dependency "rake"
23
23
 
24
24
  # dependencies
25
- spec.add_dependency 'nokogiri'
26
- spec.add_dependency 'json'
27
- spec.add_development_dependency "test-unit"
25
+ spec.add_dependency 'nokogiri', "~> 1.5.0"
26
+ spec.add_dependency 'json', "~> 1.6.1"
27
+ spec.add_development_dependency "test-unit", "~> 2.0.0"
28
28
 
29
29
  end
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wiki-api
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.0.1
4
+ version: 0.0.2
6
5
  platform: ruby
7
6
  authors:
8
7
  - Dennis Blommesteijn
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-03-28 00:00:00.000000000 Z
11
+ date: 2013-04-03 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bundler
@@ -18,13 +17,11 @@ dependencies:
18
17
  - - "~>"
19
18
  - !ruby/object:Gem::Version
20
19
  version: '1.3'
21
- none: false
22
20
  requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
22
  - - "~>"
25
23
  - !ruby/object:Gem::Version
26
24
  version: '1.3'
27
- none: false
28
25
  prerelease: false
29
26
  type: :development
30
27
  - !ruby/object:Gem::Dependency
@@ -33,73 +30,57 @@ dependencies:
33
30
  requirements:
34
31
  - - ">="
35
32
  - !ruby/object:Gem::Version
36
- version: !binary |-
37
- MA==
38
- none: false
33
+ version: '0'
39
34
  requirement: !ruby/object:Gem::Requirement
40
35
  requirements:
41
36
  - - ">="
42
37
  - !ruby/object:Gem::Version
43
- version: !binary |-
44
- MA==
45
- none: false
38
+ version: '0'
46
39
  prerelease: false
47
40
  type: :development
48
41
  - !ruby/object:Gem::Dependency
49
42
  name: nokogiri
50
43
  version_requirements: !ruby/object:Gem::Requirement
51
44
  requirements:
52
- - - ">="
45
+ - - "~>"
53
46
  - !ruby/object:Gem::Version
54
- version: !binary |-
55
- MA==
56
- none: false
47
+ version: 1.5.0
57
48
  requirement: !ruby/object:Gem::Requirement
58
49
  requirements:
59
- - - ">="
50
+ - - "~>"
60
51
  - !ruby/object:Gem::Version
61
- version: !binary |-
62
- MA==
63
- none: false
52
+ version: 1.5.0
64
53
  prerelease: false
65
54
  type: :runtime
66
55
  - !ruby/object:Gem::Dependency
67
56
  name: json
68
57
  version_requirements: !ruby/object:Gem::Requirement
69
58
  requirements:
70
- - - ">="
59
+ - - "~>"
71
60
  - !ruby/object:Gem::Version
72
- version: !binary |-
73
- MA==
74
- none: false
61
+ version: 1.6.1
75
62
  requirement: !ruby/object:Gem::Requirement
76
63
  requirements:
77
- - - ">="
64
+ - - "~>"
78
65
  - !ruby/object:Gem::Version
79
- version: !binary |-
80
- MA==
81
- none: false
66
+ version: 1.6.1
82
67
  prerelease: false
83
68
  type: :runtime
84
69
  - !ruby/object:Gem::Dependency
85
70
  name: test-unit
86
71
  version_requirements: !ruby/object:Gem::Requirement
87
72
  requirements:
88
- - - ">="
73
+ - - "~>"
89
74
  - !ruby/object:Gem::Version
90
- version: !binary |-
91
- MA==
92
- none: false
75
+ version: 2.0.0
93
76
  requirement: !ruby/object:Gem::Requirement
94
77
  requirements:
95
- - - ">="
78
+ - - "~>"
96
79
  - !ruby/object:Gem::Version
97
- version: !binary |-
98
- MA==
99
- none: false
80
+ version: 2.0.0
100
81
  prerelease: false
101
82
  type: :development
102
- description: MediaWiki API and parser
83
+ description: MediaWiki API and Page content parser for Headlines, Blocks, Elements, ListItems, and Links.
103
84
  email:
104
85
  - dennis@blommesteijn.com
105
86
  executables: []
@@ -123,11 +104,13 @@ files:
123
104
  - test/test_helper.rb
124
105
  - test/unit/files/Wiktionary_Welcome,_newcomers.html
125
106
  - test/unit/wiki_connect.rb
107
+ - test/unit/wiki_page_config.rb
126
108
  - test/unit/wiki_page_object.rb
127
109
  - wiki-api.gemspec
128
- homepage: ''
110
+ homepage: https://github.com/dblommesteijn/wiki-api
129
111
  licenses:
130
112
  - MIT
113
+ metadata: {}
131
114
  post_install_message:
132
115
  rdoc_options: []
133
116
  require_paths:
@@ -136,30 +119,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
136
119
  requirements:
137
120
  - - ">="
138
121
  - !ruby/object:Gem::Version
139
- segments:
140
- - 0
141
- hash: 2
142
- version: !binary |-
143
- MA==
144
- none: false
122
+ version: '0'
145
123
  required_rubygems_version: !ruby/object:Gem::Requirement
146
124
  requirements:
147
125
  - - ">="
148
126
  - !ruby/object:Gem::Version
149
- segments:
150
- - 0
151
- hash: 2
152
- version: !binary |-
153
- MA==
154
- none: false
127
+ version: '0'
155
128
  requirements: []
156
129
  rubyforge_project:
157
- rubygems_version: 1.8.24
130
+ rubygems_version: 2.0.3
158
131
  signing_key:
159
- specification_version: 3
160
- summary: MediaWiki API and parser
132
+ specification_version: 4
133
+ summary: MediaWiki API and Page content parser for Headlines, Blocks, Elements, ListItems, and Links.
161
134
  test_files:
162
135
  - test/test_helper.rb
163
136
  - test/unit/files/Wiktionary_Welcome,_newcomers.html
164
137
  - test/unit/wiki_connect.rb
138
+ - test/unit/wiki_page_config.rb
165
139
  - test/unit/wiki_page_object.rb