wiki-api 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: !binary |-
4
+ NjgxOGUxZjQ2MWQ2MjNhMDA2ZGUwMTRhOGI4MWFlOGQ3MzI4MWFjOA==
5
+ data.tar.gz: !binary |-
6
+ ZmZkNDFhMzc0ZTNmZDBlYTFmMTIwMmU5ZDgzYTQ2YjM0ZTk1ZmQzYg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NGM4YTU2MjQ3Njk1MzJkMDhlYjcxODYxNDFkNzRlODI5MjMwNmU5ZGEzZmJj
10
+ MjhjZjYxYzcxMmYzYjA0YzA3NzdlYTJhMjM0ZTllNzgyMDk0MGJiNjBiZWRl
11
+ N2Y5YzMwZWZjZmY3NWQ0YmJiMjdiOTkwOTU1ZmE4MDg5Njk4M2Y=
12
+ data.tar.gz: !binary |-
13
+ MGZlMTYzZTgzZWE3YmYzZmIyMjc0OTZhMGY0NDEwYzJmNmFiMTZkNDM3OGM2
14
+ Mjc1MDdjMzQ3MjM1NmVlODM3Mzg5ZTViMGRmOGI2NzE1NDZjODJhZTA2MjI5
15
+ NWE3YmI4MDYxY2I4NGM3MGUwNzAzNjQ3YjMwODU5NDBlMWYxZDM=
data/README.md CHANGED
@@ -17,17 +17,21 @@ Requests to the MediaWiki API use the following URI structure:
17
17
 
18
18
  ### Roadmap
19
19
 
20
- * Version (0.0.1) (current)
21
-
22
- Initial project.
23
-
24
- * Version (0.0.2)
20
+ * Version (0.0.2) (current)
25
21
 
26
22
  Index important words per block, page, list item;
27
23
 
28
24
  Parse objects for more elements within a Page.
29
25
 
30
26
 
27
+ ### Changelog
28
+
29
+ * Version (0.0.1) -> (0.0.2)
30
+
31
+ Nested ListItems, Links (within Page)
32
+
33
+ Search on Page headline (ignore case, and underscore)
34
+
31
35
 
32
36
  ### Known Issues
33
37
 
@@ -132,7 +136,7 @@ end
132
136
  ```
133
137
 
134
138
 
135
- ### Example (https://en.wikipedia.org/wiki/Ruby_on_rails)
139
+ ### Example using Global config (https://en.wikipedia.org/wiki/Ruby_on_rails)
136
140
 
137
141
  This is a example of querying wikipedia.org on the page: "Ruby_on_rails", and printing the References headline links for each list item.
138
142
 
@@ -161,6 +165,28 @@ end
161
165
 
162
166
 
163
167
 
168
+ ### Example passing URI (https://en.wikipedia.org/wiki/Ruby_on_rails)
169
+
170
+ This is the same example as the one above, except for setting a global config to direct the requests to a given URI.
171
+
172
+ ```ruby
173
+ # querying the page
174
+ page = Wiki::Api::Page.new name: "Ruby_on_rails", uri: "https://en.wikipedia.org"
175
+
176
+ # get headlines with name Reference (there can be multiple headlines with the same name!)
177
+ headlines = page.headline "References"
178
+
179
+ # iterate headlines
180
+ headlines.each do |headline|
181
+ # iterate list items on the given headline
182
+ headline.block.list_items.each do |list_item|
183
+
184
+ # print the uri of all links
185
+ puts list_item.links.map{ |l| l.uri }
186
+
187
+ end
188
+ end
189
+ ```
164
190
 
165
191
 
166
192
 
@@ -10,6 +10,7 @@ module Wiki
10
10
  attr_accessor :uri, :api_path, :api_options, :http, :request, :response, :html, :parsed
11
11
 
12
12
  def initialize(options={})
13
+ @@config ||= nil
13
14
  options.merge! @@config unless @@config.nil?
14
15
  self.uri = options[:uri] if options.include? :uri
15
16
  self.api_path = options[:api_path] if options.include? :api_path
data/lib/wiki/api/page.rb CHANGED
@@ -3,14 +3,16 @@ module Wiki
3
3
 
4
4
  class Page
5
5
 
6
- attr_accessor :name, :parsed_page
6
+ attr_accessor :name, :parsed_page, :uri
7
7
 
8
8
  def initialize(options={})
9
9
  self.name = options[:name] if options.include? :name
10
+ uri = options[:uri] if options.include? :uri
11
+
10
12
  @@config ||= nil
11
- if @@config.nil?
13
+ if @@config.nil? || !uri.nil?
12
14
  # use the connection to collect HTML pages for parsing
13
- @connect = Wiki::Api::Connect.new
15
+ @connect = Wiki::Api::Connect.new uri: uri
14
16
  else
15
17
  # using a local HTML file for parsing
16
18
  end
@@ -79,8 +81,8 @@ module Wiki
79
81
 
80
82
  # get headline nodes by span class
81
83
  xs = self.parsed_page.xpath("//span[@class='mw-headline']")
82
- # filter single headline by name
83
- xs = xs.reject{|t| t.attributes["id"].value != headline_name } unless headline_name.nil?
84
+ # filter single headline by name (ignore case)
85
+ xs = self.filter_headline xs, headline_name unless headline_name.nil?
84
86
 
85
87
  # NOTE: first_part has no id attribute and thus cannot be filtered or processed within xpath (xs)
86
88
  if headline_name == self.name || headline_name.nil?
@@ -119,6 +121,14 @@ module Wiki
119
121
  elements
120
122
  end
121
123
 
124
+ def filter_headline xs, headline_name
125
+ # transform name to a wiki_id (downcase and space replace with underscore)
126
+ headline_name = headline_name.downcase.gsub(" ", "_")
127
+ # reject not matching id's
128
+ xs.reject do |t|
129
+ !t.attributes["id"].value.downcase.start_with?(headline_name)
130
+ end
131
+ end
122
132
 
123
133
  end
124
134
 
data/lib/wiki/api/util.rb CHANGED
@@ -22,6 +22,7 @@ module Wiki
22
22
  result.map{|k,v| v.join("")}
23
23
  end
24
24
 
25
+
25
26
  protected
26
27
  def clean_text text
27
28
  text.gsub(/\n/, " ").squeeze(" ").gsub(/\s(\W)/, '\1').gsub(/(\W)\s/, '\1 ').strip
@@ -1,5 +1,5 @@
1
1
  module Wiki
2
2
  module Api
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
5
5
  end
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'test/unit'
5
+ require File.expand_path(File.dirname(__FILE__) + "/../../lib/wiki/api")
6
+
7
+ #
8
+ # Testing the parsing of URI by passing a uri variable to Page:
9
+ # https://en.wiktionary.org/wiki/Wiktionary:Welcome,_newcomers
10
+ #
11
+
12
+ class WikiPageConfig < Test::Unit::TestCase
13
+
14
+ def setup
15
+ # NOTE: comment Page.config, to use the online MediaWiki instance
16
+ # Wiki::Api::Page.config = PAGE_CONFIG
17
+ # Wiki::Api::Connect.config = GLB_CONFIG
18
+ # @page_name = "Wiktionary:Welcome,_newcomers"
19
+ end
20
+
21
+ def teardown
22
+ end
23
+
24
+ # test simple page invocation
25
+ def test_page_invocation_with_uri
26
+ page = Wiki::Api::Page.new name: "Wiktionary:Welcome,_newcomers", uri: "http://en.wiktionary.org"
27
+ headlines = page.headlines
28
+ assert !headlines.empty?, "expected headlines"
29
+ assert headlines.size < 1, "expected more than one headline"
30
+ headlines.each do |headline|
31
+ assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
32
+ end
33
+ end
34
+
35
+ def test_wrong_page_invocation_with_uri
36
+ begin
37
+ page = Wiki::Api::Page.new name: "A_Wrong_Page_Name", uri: "http://en.wiktionary.org"
38
+ assert false, "expected a failiure"
39
+ rescue Exception => e
40
+ assert true, "expected a failiure"
41
+ end
42
+ end
43
+
44
+ end
45
+
@@ -45,6 +45,7 @@ class WikiPageObject < Test::Unit::TestCase
45
45
  page = Wiki::Api::Page.new name: @page_name
46
46
  headlines = page.headlines
47
47
  assert !headlines.empty?, "expected headlines"
48
+ assert headlines.size > 1, "expected more than one headline"
48
49
  headlines.each do |headline|
49
50
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
50
51
  elements = headline.elements.flatten
@@ -62,6 +63,7 @@ class WikiPageObject < Test::Unit::TestCase
62
63
  page = Wiki::Api::Page.new name: @page_name
63
64
  headlines = page.headlines
64
65
  assert !headlines.empty?, "expected headlines"
66
+ assert headlines.size > 1, "expected more than one headline"
65
67
  headlines.each do |headline|
66
68
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
67
69
  block = headline.block
@@ -74,6 +76,7 @@ class WikiPageObject < Test::Unit::TestCase
74
76
  page = Wiki::Api::Page.new name: @page_name
75
77
  headlines = page.headlines
76
78
  assert !headlines.empty?, "expected headlines"
79
+ assert headlines.size > 1, "expected more than one headline"
77
80
  headlines.each do |headline|
78
81
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
79
82
  block = headline.block
@@ -91,6 +94,7 @@ class WikiPageObject < Test::Unit::TestCase
91
94
  page = Wiki::Api::Page.new name: @page_name
92
95
  headlines = page.headlines
93
96
  assert !headlines.empty?, "expected headlines"
97
+ assert headlines.size > 1, "expected more than one headline"
94
98
  headlines.each do |headline|
95
99
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
96
100
  block = headline.block
@@ -108,6 +112,7 @@ class WikiPageObject < Test::Unit::TestCase
108
112
  page = Wiki::Api::Page.new name: @page_name
109
113
  headlines = page.headlines
110
114
  assert !headlines.empty?, "expected headlines"
115
+ assert headlines.size > 1, "expected more than one headline"
111
116
  headlines.each do |headline|
112
117
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
113
118
  block = headline.block
@@ -126,6 +131,7 @@ class WikiPageObject < Test::Unit::TestCase
126
131
  page = Wiki::Api::Page.new name: @page_name
127
132
  headlines = page.headlines
128
133
  assert !headlines.empty?, "expected headlines"
134
+ assert headlines.size > 1, "expected more than one headline"
129
135
  headlines.each do |headline|
130
136
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
131
137
  block = headline.block
@@ -148,6 +154,7 @@ class WikiPageObject < Test::Unit::TestCase
148
154
  page = Wiki::Api::Page.new name: @page_name
149
155
  headlines = page.headlines
150
156
  assert !headlines.empty?, "expected headlines"
157
+ assert headlines.size > 1, "expected more than one headline"
151
158
 
152
159
  # collect headline names
153
160
  hs = []
@@ -162,10 +169,61 @@ class WikiPageObject < Test::Unit::TestCase
162
169
  headlines = page.headline h
163
170
  # test for at least one (many indicates multiple headlines with the same name)
164
171
  assert !headlines.empty?, "expected a list of headlines"
172
+ assert headlines.size == 1, "expected one headline"
165
173
  headlines.each do |headline|
166
174
  assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
167
175
  end
168
176
  end
169
177
  end
170
178
 
179
+ def test_page_headline_search_downcase
180
+ page = Wiki::Api::Page.new name: @page_name
181
+
182
+ headlines = page.headline "Editing_Wiktionary"
183
+ assert !headlines.empty?, "expected headlines"
184
+ assert headlines.size == 1, "expected one headline"
185
+
186
+
187
+ # iterate headlines
188
+ headlines.each do |headline|
189
+ assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
190
+ end
191
+
192
+ # search downcase
193
+ headlines = page.headline "editing_wiktionary"
194
+ assert !headlines.empty?, "expected headlines"
195
+
196
+ # iterate headlines
197
+ headlines.each do |headline|
198
+ assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
199
+ end
200
+
201
+ end
202
+
203
+ def test_page_headline_search_regular
204
+ page = Wiki::Api::Page.new name: @page_name
205
+
206
+ headlines = page.headline "Editing_Wiktionary"
207
+ assert !headlines.empty?, "expected headlines"
208
+ assert headlines.size == 1, "expected one headline"
209
+
210
+ # iterate headlines
211
+ headlines.each do |headline|
212
+ assert headline.is_a?(Wiki::Api::PageHeadline), "expected headline object"
213
+ end
214
+
215
+ # search downcase
216
+ headlines = page.headline "editing_wiktionary"
217
+ assert headlines.size == 1, "expected one headline"
218
+
219
+ # search downcase with spaces
220
+ headlines = page.headline "editing wiktionary"
221
+ assert headlines.size == 1, "expected one headline"
222
+
223
+ # search idiot case with spaces
224
+ headlines = page.headline "eDiTinG wiKtiOnarY"
225
+ assert headlines.size == 1, "expected one headline"
226
+
227
+ end
228
+
171
229
  end
data/wiki-api.gemspec CHANGED
@@ -8,9 +8,9 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Wiki::Api::VERSION
9
9
  spec.authors = ["Dennis Blommesteijn"]
10
10
  spec.email = ["dennis@blommesteijn.com"]
11
- spec.description = %q{MediaWiki API and parser}
12
- spec.summary = %q{MediaWiki API and parser}
13
- spec.homepage = ""
11
+ spec.description = %q{MediaWiki API and Page content parser for Headlines, Blocks, Elements, ListItems, and Links.}
12
+ spec.summary = %q{MediaWiki API and Page content parser for Headlines, Blocks, Elements, ListItems, and Links.}
13
+ spec.homepage = "https://github.com/dblommesteijn/wiki-api"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -22,8 +22,8 @@ Gem::Specification.new do |spec|
22
22
  spec.add_development_dependency "rake"
23
23
 
24
24
  # dependencies
25
- spec.add_dependency 'nokogiri'
26
- spec.add_dependency 'json'
27
- spec.add_development_dependency "test-unit"
25
+ spec.add_dependency 'nokogiri', "~> 1.5.0"
26
+ spec.add_dependency 'json', "~> 1.6.1"
27
+ spec.add_development_dependency "test-unit", "~> 2.0.0"
28
28
 
29
29
  end
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wiki-api
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.0.1
4
+ version: 0.0.2
6
5
  platform: ruby
7
6
  authors:
8
7
  - Dennis Blommesteijn
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-03-28 00:00:00.000000000 Z
11
+ date: 2013-04-03 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: bundler
@@ -18,13 +17,11 @@ dependencies:
18
17
  - - "~>"
19
18
  - !ruby/object:Gem::Version
20
19
  version: '1.3'
21
- none: false
22
20
  requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
22
  - - "~>"
25
23
  - !ruby/object:Gem::Version
26
24
  version: '1.3'
27
- none: false
28
25
  prerelease: false
29
26
  type: :development
30
27
  - !ruby/object:Gem::Dependency
@@ -33,73 +30,57 @@ dependencies:
33
30
  requirements:
34
31
  - - ">="
35
32
  - !ruby/object:Gem::Version
36
- version: !binary |-
37
- MA==
38
- none: false
33
+ version: '0'
39
34
  requirement: !ruby/object:Gem::Requirement
40
35
  requirements:
41
36
  - - ">="
42
37
  - !ruby/object:Gem::Version
43
- version: !binary |-
44
- MA==
45
- none: false
38
+ version: '0'
46
39
  prerelease: false
47
40
  type: :development
48
41
  - !ruby/object:Gem::Dependency
49
42
  name: nokogiri
50
43
  version_requirements: !ruby/object:Gem::Requirement
51
44
  requirements:
52
- - - ">="
45
+ - - "~>"
53
46
  - !ruby/object:Gem::Version
54
- version: !binary |-
55
- MA==
56
- none: false
47
+ version: 1.5.0
57
48
  requirement: !ruby/object:Gem::Requirement
58
49
  requirements:
59
- - - ">="
50
+ - - "~>"
60
51
  - !ruby/object:Gem::Version
61
- version: !binary |-
62
- MA==
63
- none: false
52
+ version: 1.5.0
64
53
  prerelease: false
65
54
  type: :runtime
66
55
  - !ruby/object:Gem::Dependency
67
56
  name: json
68
57
  version_requirements: !ruby/object:Gem::Requirement
69
58
  requirements:
70
- - - ">="
59
+ - - "~>"
71
60
  - !ruby/object:Gem::Version
72
- version: !binary |-
73
- MA==
74
- none: false
61
+ version: 1.6.1
75
62
  requirement: !ruby/object:Gem::Requirement
76
63
  requirements:
77
- - - ">="
64
+ - - "~>"
78
65
  - !ruby/object:Gem::Version
79
- version: !binary |-
80
- MA==
81
- none: false
66
+ version: 1.6.1
82
67
  prerelease: false
83
68
  type: :runtime
84
69
  - !ruby/object:Gem::Dependency
85
70
  name: test-unit
86
71
  version_requirements: !ruby/object:Gem::Requirement
87
72
  requirements:
88
- - - ">="
73
+ - - "~>"
89
74
  - !ruby/object:Gem::Version
90
- version: !binary |-
91
- MA==
92
- none: false
75
+ version: 2.0.0
93
76
  requirement: !ruby/object:Gem::Requirement
94
77
  requirements:
95
- - - ">="
78
+ - - "~>"
96
79
  - !ruby/object:Gem::Version
97
- version: !binary |-
98
- MA==
99
- none: false
80
+ version: 2.0.0
100
81
  prerelease: false
101
82
  type: :development
102
- description: MediaWiki API and parser
83
+ description: MediaWiki API and Page content parser for Headlines, Blocks, Elements, ListItems, and Links.
103
84
  email:
104
85
  - dennis@blommesteijn.com
105
86
  executables: []
@@ -123,11 +104,13 @@ files:
123
104
  - test/test_helper.rb
124
105
  - test/unit/files/Wiktionary_Welcome,_newcomers.html
125
106
  - test/unit/wiki_connect.rb
107
+ - test/unit/wiki_page_config.rb
126
108
  - test/unit/wiki_page_object.rb
127
109
  - wiki-api.gemspec
128
- homepage: ''
110
+ homepage: https://github.com/dblommesteijn/wiki-api
129
111
  licenses:
130
112
  - MIT
113
+ metadata: {}
131
114
  post_install_message:
132
115
  rdoc_options: []
133
116
  require_paths:
@@ -136,30 +119,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
136
119
  requirements:
137
120
  - - ">="
138
121
  - !ruby/object:Gem::Version
139
- segments:
140
- - 0
141
- hash: 2
142
- version: !binary |-
143
- MA==
144
- none: false
122
+ version: '0'
145
123
  required_rubygems_version: !ruby/object:Gem::Requirement
146
124
  requirements:
147
125
  - - ">="
148
126
  - !ruby/object:Gem::Version
149
- segments:
150
- - 0
151
- hash: 2
152
- version: !binary |-
153
- MA==
154
- none: false
127
+ version: '0'
155
128
  requirements: []
156
129
  rubyforge_project:
157
- rubygems_version: 1.8.24
130
+ rubygems_version: 2.0.3
158
131
  signing_key:
159
- specification_version: 3
160
- summary: MediaWiki API and parser
132
+ specification_version: 4
133
+ summary: MediaWiki API and Page content parser for Headlines, Blocks, Elements, ListItems, and Links.
161
134
  test_files:
162
135
  - test/test_helper.rb
163
136
  - test/unit/files/Wiktionary_Welcome,_newcomers.html
164
137
  - test/unit/wiki_connect.rb
138
+ - test/unit/wiki_page_config.rb
165
139
  - test/unit/wiki_page_object.rb