wiki-api 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/README.md +64 -33
- data/lib/wiki/api/connect.rb +21 -7
- data/lib/wiki/api/page.rb +23 -50
- data/lib/wiki/api/page_block.rb +6 -4
- data/lib/wiki/api/page_headline.rb +97 -2
- data/lib/wiki/api/page_link.rb +9 -4
- data/lib/wiki/api/page_list_item.rb +4 -2
- data/lib/wiki/api/util.rb +12 -1
- data/lib/wiki/api/version.rb +1 -1
- data/test/unit/files/Wiktionary_program.html +4232 -0
- data/test/unit/wiki_page_offline.rb +262 -0
- data/wiki-api.gemspec +2 -2
- metadata +8 -8
- data/test/unit/wiki_page_config.rb +0 -45
- data/test/unit/wiki_page_object.rb +0 -229
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NjQ3MjZkMDdmNTg2YjdhZDRmM2E3MjU4ZjA1Y2IwOGYzODEwZTFkMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YWE4Mzc4ZjRlYTBjNGE4MTkyYmE0OGFkOTJkMDViZTI0MjQ5MGFiMw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OTNhMTZkNjMwNzJiMzU5YWE0ZDZiNzRlZWU5ZDJjM2Q1NTA5ZWRiN2IzY2Mw
|
10
|
+
MmU1ZDk0ODZhN2U4ODYwNjY0ZjdmY2U5ZTFkMDk4ZDA2MzIyODUzNjE0YzVl
|
11
|
+
OGE2ZmFmOTYyOWY2MWIyNGNlNmU5NjYwOTNkMGNhNjllOWM0YzQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YjgzZGEzYzhhOWFmNzZhMjRlMWFiYmJiY2Q3N2EwOGQwZTBjY2Q0NzYxNWE2
|
14
|
+
ODc5NmMyNmYyODMyNmVmMjFmYzhhOTAzMTUzZTBmODU2OTMwY2RhYjg0Mjkz
|
15
|
+
Yjk3NjMzNGFlZGViYzQyOGQ5YzVjM2MzMjIyNWVlOWRhOTU0MDk=
|
data/README.md
CHANGED
@@ -1,13 +1,17 @@
|
|
1
1
|
# Wiki::Api
|
2
2
|
|
3
|
-
Wiki API is a gem (Ruby on Rails) that interfaces with the MediaWiki API (https://www.mediawiki.org/wiki/API:Main_page). This gem is more than a interface, it has abstract classes
|
3
|
+
Wiki API is a gem (Ruby on Rails) that interfaces with the MediaWiki API (https://www.mediawiki.org/wiki/API:Main_page). This gem is more than a interface, it has abstract classes for Page and Headline parsing. You're able to iterate through these headlines, and access data accordingly.
|
4
4
|
|
5
|
-
NOTE: nokogiri
|
5
|
+
NOTE: This gem has a nokogiri (http://nokogiri.org/Nokogiri.html) backend (for HTML parsing). Major components: Page, Headline, Block, ListItem, and Link are wrappers for easy data access, however it's still possible to retreive the raw HTML within these objects.
|
6
6
|
|
7
7
|
Requests to the MediaWiki API use the following URI structure:
|
8
8
|
|
9
9
|
http(s)://somemediawiki.org/w/api.php?action=parse&format=json&page="anypage"
|
10
10
|
|
11
|
+
# RDoc (rdoc.info)
|
12
|
+
|
13
|
+
http://rdoc.info/github/dblommesteijn/wiki-api/frames/file/README.md
|
14
|
+
|
11
15
|
|
12
16
|
### Dependencies (production)
|
13
17
|
|
@@ -15,27 +19,27 @@ Requests to the MediaWiki API use the following URI structure:
|
|
15
19
|
* nokogiri
|
16
20
|
|
17
21
|
|
18
|
-
### Roadmap
|
22
|
+
### Feature Roadmap
|
19
23
|
|
20
|
-
* Version (0.0
|
24
|
+
* Version (0.1.0)
|
21
25
|
|
22
|
-
|
26
|
+
Major current release with several core changes.
|
23
27
|
|
24
|
-
|
28
|
+
* Version (0.1.1)
|
29
|
+
|
30
|
+
No features determined yet (please drop me a line if you're interested in additions).
|
25
31
|
|
26
32
|
|
27
33
|
### Changelog
|
28
34
|
|
29
|
-
* Version (0.0.
|
30
|
-
|
31
|
-
Nested ListItems, Links (within Page)
|
35
|
+
* Version (0.0.2) -> (current)
|
32
36
|
|
33
|
-
|
37
|
+
PageLink URI without global config Exception resolved
|
34
38
|
|
39
|
+
Reverse (parent) object lookup
|
35
40
|
|
36
|
-
|
41
|
+
Nested PageHeadline objects
|
37
42
|
|
38
|
-
None discovered thus far.
|
39
43
|
|
40
44
|
|
41
45
|
## Installation
|
@@ -71,13 +75,16 @@ Wiki::Api::Connect.config = CONFIG
|
|
71
75
|
|
72
76
|
## Usage
|
73
77
|
|
74
|
-
### Query a Page
|
78
|
+
### Query a Page and Headline
|
75
79
|
|
76
80
|
Requesting headlines from a given page.
|
77
81
|
|
78
82
|
```ruby
|
79
83
|
page = Wiki::Api::Page.new name: "Wiktionary:Welcome,_newcomers"
|
80
|
-
|
84
|
+
# the root headline equals the pagename
|
85
|
+
puts page.root_headline.name
|
86
|
+
# iterate next level of headlines
|
87
|
+
page.root_headline.headlines.each do |headline_name, headline|
|
81
88
|
# printing headline name (PageHeadline)
|
82
89
|
puts headline.name
|
83
90
|
end
|
@@ -87,29 +94,28 @@ Getting headlines for a given name.
|
|
87
94
|
|
88
95
|
```ruby
|
89
96
|
page = Wiki::Api::Page.new name: "Wiktionary:Welcome,_newcomers"
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
97
|
+
# lookup headline by name (underscore and case are ignored)
|
98
|
+
headline = page.root_headline.headline("editing wiktionary").first
|
99
|
+
# printing headline name (PageHeadline)
|
100
|
+
puts headline.name
|
101
|
+
# get the type of nested headline (html h1,2,3,4 etc.)
|
102
|
+
puts headline.type
|
94
103
|
```
|
95
104
|
|
96
105
|
### Basic Page structure
|
97
106
|
|
98
107
|
```ruby
|
99
108
|
page = Wiki::Api::Page.new name: "Wiktionary:Welcome,_newcomers"
|
100
|
-
|
101
109
|
# iterate PageHeadline objects
|
102
|
-
page.headlines.each do |headline|
|
103
|
-
|
110
|
+
page.root_headline.headlines.each do |headline_name, headline|
|
104
111
|
# exposing nokogiri internal elements
|
105
112
|
elements = headline.elements.flatten
|
106
113
|
elements.each do |element|
|
107
|
-
#
|
114
|
+
# print will result in: Nokogiri::XML::Text or Nokogiri::XML::Element
|
115
|
+
puts element.class
|
108
116
|
end
|
109
|
-
|
110
117
|
# string representation of all nested text
|
111
118
|
block.to_texts
|
112
|
-
|
113
119
|
# iterate PageListItem objects
|
114
120
|
block.list_items.each do |list_item|
|
115
121
|
# string representation of nested text
|
@@ -136,7 +142,7 @@ end
|
|
136
142
|
```
|
137
143
|
|
138
144
|
|
139
|
-
### Example using Global config (https://en.wikipedia.org/wiki/
|
145
|
+
### Example using Global config (https://en.wikipedia.org/wiki/Ruby_on_Rails)
|
140
146
|
|
141
147
|
This is a example of querying wikipedia.org on the page: "Ruby_on_rails", and printing the References headline links for each list item.
|
142
148
|
|
@@ -146,35 +152,32 @@ CONFIG = { uri: "https://en.wikipedia.org" }
|
|
146
152
|
Wiki::Api::Connect.config = CONFIG
|
147
153
|
|
148
154
|
# querying the page
|
149
|
-
page = Wiki::Api::Page.new name: "
|
155
|
+
page = Wiki::Api::Page.new name: "Ruby_on_Rails"
|
150
156
|
|
151
157
|
# get headlines with name Reference (there can be multiple headlines with the same name!)
|
152
|
-
headlines = page.headline "References"
|
158
|
+
headlines = page.root_headline.headline "References"
|
153
159
|
|
154
160
|
# iterate headlines
|
155
161
|
headlines.each do |headline|
|
156
162
|
# iterate list items on the given headline
|
157
163
|
headline.block.list_items.each do |list_item|
|
158
|
-
|
159
164
|
# print the uri of all links
|
160
165
|
puts list_item.links.map{ |l| l.uri }
|
161
|
-
|
162
166
|
end
|
163
167
|
end
|
164
168
|
```
|
165
169
|
|
166
170
|
|
167
|
-
|
168
|
-
### Example passing URI (https://en.wikipedia.org/wiki/Ruby_on_rails)
|
171
|
+
### Example passing URI (https://en.wikipedia.org/wiki/Ruby_on_Rails)
|
169
172
|
|
170
173
|
This is the same example as the one above, except for setting a global config to direct the requests to a given URI.
|
171
174
|
|
172
175
|
```ruby
|
173
176
|
# querying the page
|
174
|
-
page = Wiki::Api::Page.new name: "
|
177
|
+
page = Wiki::Api::Page.new name: "Ruby_on_Rails", uri: "https://en.wikipedia.org"
|
175
178
|
|
176
179
|
# get headlines with name Reference (there can be multiple headlines with the same name!)
|
177
|
-
headlines = page.headline "References"
|
180
|
+
headlines = page.root_headline.headline "References"
|
178
181
|
|
179
182
|
# iterate headlines
|
180
183
|
headlines.each do |headline|
|
@@ -189,4 +192,32 @@ end
|
|
189
192
|
```
|
190
193
|
|
191
194
|
|
195
|
+
### Example searching headlines
|
196
|
+
|
197
|
+
This example shows how the headlines can be searched. For more info check: https://github.com/dblommesteijn/wiki-api/blob/master/lib/wiki/api/page.rb#L97
|
198
|
+
|
199
|
+
|
200
|
+
```ruby
|
201
|
+
# querying the page
|
202
|
+
page = Wiki::Api::Page.new name: "Ruby_on_Rails", uri: "https://en.wikipedia.org"
|
203
|
+
|
204
|
+
# NOTE: the following are all valid headline names:
|
205
|
+
# request headline (by literal name)
|
206
|
+
headlines = page.root_headline.headline "Philosophy_and_design"
|
207
|
+
puts headlines.map{|h| h.name}
|
208
|
+
# request headline (by downcase name)
|
209
|
+
headlines = page.root_headline.headline "philosophy_and_design"
|
210
|
+
puts headlines.map{|h| h.name}
|
211
|
+
# request headline (by human name)
|
212
|
+
headlines = page.root_headline.headline "philosophy and design"
|
213
|
+
puts headlines.map{|h| h.name}
|
214
|
+
|
215
|
+
# NOTE2: headlines are matched on headline.start_with?(requested_headline)
|
216
|
+
# because of start_with? compare this should work as well!
|
217
|
+
headlines = page.root_headline.headline "philosophy"
|
218
|
+
puts headlines.map{|h| h.name}
|
219
|
+
```
|
220
|
+
|
221
|
+
|
222
|
+
|
192
223
|
|
data/lib/wiki/api/connect.rb
CHANGED
@@ -7,12 +7,13 @@ module Wiki
|
|
7
7
|
|
8
8
|
class Connect
|
9
9
|
|
10
|
-
attr_accessor :uri, :api_path, :api_options, :http, :request, :response, :html, :parsed
|
10
|
+
attr_accessor :uri, :api_path, :api_options, :http, :request, :response, :html, :parsed, :file
|
11
11
|
|
12
12
|
def initialize(options={})
|
13
13
|
@@config ||= nil
|
14
14
|
options.merge! @@config unless @@config.nil?
|
15
15
|
self.uri = options[:uri] if options.include? :uri
|
16
|
+
self.file = options[:file] if options.include? :file
|
16
17
|
self.api_path = options[:api_path] if options.include? :api_path
|
17
18
|
self.api_options = options[:api_options] if options.include? :api_options
|
18
19
|
|
@@ -38,12 +39,25 @@ module Wiki
|
|
38
39
|
|
39
40
|
def page page_name
|
40
41
|
self.api_options[:page] = page_name
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
# parse page by uri
|
43
|
+
if !self.uri.nil? && self.file.nil?
|
44
|
+
self.connect
|
45
|
+
response = self.response
|
46
|
+
json = JSON.parse response.body, {symbolize_names: true}
|
47
|
+
raise json[:error][:code] unless valid? json, response
|
48
|
+
self.html = json[:parse][:text]
|
49
|
+
self.parsed = Nokogiri::HTML self.html[:*]
|
50
|
+
# parse page by file
|
51
|
+
elsif !self.file.nil?
|
52
|
+
f = File.open(self.file)
|
53
|
+
# self.parsed = Nokogiri::HTML self.html[:*]
|
54
|
+
self.parsed = Nokogiri::HTML(f)
|
55
|
+
f.close
|
56
|
+
# invalid config, raise exception
|
57
|
+
else
|
58
|
+
raise "no :uri or :file config found!"
|
59
|
+
end
|
60
|
+
self.parsed
|
47
61
|
end
|
48
62
|
|
49
63
|
class << self
|
data/lib/wiki/api/page.rb
CHANGED
@@ -1,49 +1,34 @@
|
|
1
1
|
module Wiki
|
2
2
|
module Api
|
3
3
|
|
4
|
+
# MediaWiki Page, collection of all html information plus it's page title
|
4
5
|
class Page
|
5
6
|
|
6
|
-
attr_accessor :name, :parsed_page, :uri
|
7
|
+
attr_accessor :name, :parsed_page, :uri, :parent
|
7
8
|
|
8
9
|
def initialize(options={})
|
9
10
|
self.name = options[:name] if options.include? :name
|
10
|
-
uri = options[:uri] if options.include? :uri
|
11
|
-
|
12
|
-
@@config ||= nil
|
13
|
-
if @@config.nil? || !uri.nil?
|
14
|
-
# use the connection to collect HTML pages for parsing
|
15
|
-
@connect = Wiki::Api::Connect.new uri: uri
|
16
|
-
else
|
17
|
-
# using a local HTML file for parsing
|
18
|
-
end
|
11
|
+
self.uri = options[:uri] if options.include? :uri
|
12
|
+
@connect = Wiki::Api::Connect.new uri: uri
|
19
13
|
end
|
20
14
|
|
21
|
-
def
|
22
|
-
|
23
|
-
self.parse_blocks.each do |headline_name, elements|
|
24
|
-
headline = PageHeadline.new name: headline_name
|
25
|
-
elements.each do |element|
|
26
|
-
# nokogiri element
|
27
|
-
headline.block << element
|
28
|
-
end
|
29
|
-
headlines << headline
|
30
|
-
end
|
31
|
-
headlines
|
15
|
+
def connect
|
16
|
+
@connect
|
32
17
|
end
|
33
18
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
elements.each do |element|
|
39
|
-
# nokogiri element
|
40
|
-
headline.block << element
|
41
|
-
end
|
42
|
-
headlines << headline
|
43
|
-
end
|
44
|
-
headlines
|
19
|
+
|
20
|
+
# collect all headlines, keep original page formatting
|
21
|
+
def root_headline
|
22
|
+
self.parse_blocks
|
45
23
|
end
|
46
24
|
|
25
|
+
# # collect headlines by given name, this will flatten the nested headlines
|
26
|
+
# def flat_headlines_by_name headline_name
|
27
|
+
# raise "not yet implemented!"
|
28
|
+
# # TODO: implement flattening of headlines within the root headline
|
29
|
+
# # ALT: breath search option in the root of the first headline
|
30
|
+
# self.parse_blocks(headline_name)
|
31
|
+
# end
|
47
32
|
|
48
33
|
|
49
34
|
def to_html
|
@@ -55,22 +40,8 @@ module Wiki
|
|
55
40
|
self.parse_page = nil
|
56
41
|
end
|
57
42
|
|
58
|
-
class << self
|
59
|
-
def config=(config = {})
|
60
|
-
@@config = config
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
protected
|
65
|
-
|
66
43
|
def load_page!
|
67
|
-
|
68
|
-
self.parsed_page ||= @connect.page self.name
|
69
|
-
elsif self.parsed_page.nil?
|
70
|
-
f = File.open(@@config[:file])
|
71
|
-
self.parsed_page = Nokogiri::HTML(f)
|
72
|
-
f.close
|
73
|
-
end
|
44
|
+
self.parsed_page ||= @connect.page self.name
|
74
45
|
end
|
75
46
|
|
76
47
|
|
@@ -81,11 +52,12 @@ module Wiki
|
|
81
52
|
|
82
53
|
# get headline nodes by span class
|
83
54
|
xs = self.parsed_page.xpath("//span[@class='mw-headline']")
|
55
|
+
|
84
56
|
# filter single headline by name (ignore case)
|
85
57
|
xs = self.filter_headline xs, headline_name unless headline_name.nil?
|
86
58
|
|
87
59
|
# NOTE: first_part has no id attribute and thus cannot be filtered or processed within xpath (xs)
|
88
|
-
if headline_name
|
60
|
+
if headline_name.nil? || headline_name.start_with?(self.name.downcase)
|
89
61
|
x = self.first_part
|
90
62
|
result[self.name] ||= []
|
91
63
|
result[self.name] << (self.collect_elements(x.parent))
|
@@ -95,11 +67,12 @@ module Wiki
|
|
95
67
|
xs.each do |x|
|
96
68
|
headline = x.attributes["id"].value
|
97
69
|
elements = self.collect_elements x.parent.next
|
98
|
-
result[headline] ||= []
|
70
|
+
result[headline] ||= []
|
99
71
|
result[headline] << elements
|
100
72
|
end
|
101
73
|
|
102
|
-
|
74
|
+
# create root object
|
75
|
+
PageHeadline.new parent: self, name: result.first[0], headlines: result, level: 0
|
103
76
|
end
|
104
77
|
|
105
78
|
# harvest first part of the page (missing heading and class="mw-headline")
|
data/lib/wiki/api/page_block.rb
CHANGED
@@ -1,20 +1,22 @@
|
|
1
1
|
module Wiki
|
2
2
|
module Api
|
3
3
|
|
4
|
+
# Collection of elements for segmented per headline
|
4
5
|
class PageBlock
|
5
6
|
|
6
|
-
attr_accessor :elements
|
7
|
+
attr_accessor :elements, :parent
|
7
8
|
|
8
9
|
def initialize options={}
|
10
|
+
self.parent = options[:parent] if options.include? :parent
|
9
11
|
self.elements = []
|
10
12
|
end
|
11
13
|
|
12
14
|
def << value
|
15
|
+
# value.first.previous.name
|
13
16
|
self.elements << value
|
14
17
|
end
|
15
18
|
|
16
19
|
def to_texts
|
17
|
-
# TODO: perhaps we should wrap the elements with objects??
|
18
20
|
texts = []
|
19
21
|
self.elements.flatten.each do |element|
|
20
22
|
text = Wiki::Api::Util.element_to_text element if element.is_a? Nokogiri::XML::Element
|
@@ -28,14 +30,14 @@ module Wiki
|
|
28
30
|
def list_items
|
29
31
|
# TODO: perhaps we should wrap the elements with objects, and request a li per element??
|
30
32
|
self.search("li").map do |list_item|
|
31
|
-
PageListItem.new element: list_item
|
33
|
+
PageListItem.new parent: self, element: list_item
|
32
34
|
end
|
33
35
|
end
|
34
36
|
|
35
37
|
def links
|
36
38
|
# TODO: perhaps we should wrap the elements with objects, and request a li per element??
|
37
39
|
self.search("a").map do |a|
|
38
|
-
PageLink.new element: a
|
40
|
+
PageLink.new parent: self, element: a
|
39
41
|
end
|
40
42
|
end
|
41
43
|
|
@@ -1,20 +1,115 @@
|
|
1
1
|
module Wiki
|
2
2
|
module Api
|
3
3
|
|
4
|
+
# Headline for a page (class="mw-healine")
|
4
5
|
class PageHeadline
|
5
6
|
|
6
|
-
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
LEVEL = ["text", "h1", "h2", "h3", "h4", "h5", "h6"]
|
10
|
+
|
11
|
+
attr_accessor :name, :block, :parent, :headlines, :level
|
7
12
|
|
8
13
|
def initialize options={}
|
9
14
|
self.name = options[:name] if options.include? :name
|
10
|
-
self.
|
15
|
+
self.parent = options[:parent] if options.include? :parent
|
16
|
+
self.level = options[:level] if options.include? :level
|
17
|
+
options[:headlines] ||= []
|
18
|
+
self.headlines ||= {}
|
19
|
+
|
20
|
+
# store elements in a block
|
21
|
+
self.block = PageBlock.new parent: self
|
22
|
+
if options[:headlines].include? self.name
|
23
|
+
options[:headlines][self.name].each do |element|
|
24
|
+
self.block << element
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# collect nested headlines
|
29
|
+
headlines = options[:headlines]
|
30
|
+
# remove self from list
|
31
|
+
headlines.delete self.name
|
32
|
+
nested_headlines = self.nested_headlines headlines, self.name, self.level
|
33
|
+
|
34
|
+
# iterate nested headlines, and call recursive
|
35
|
+
nested_headlines.each do |headline_name, value|
|
36
|
+
level = LEVEL.index value.first.first.previous.name
|
37
|
+
self.headlines[headline_name] = (PageHeadline.new parent: self, name: headline_name, headlines: headlines, level: level)
|
38
|
+
end
|
11
39
|
end
|
12
40
|
|
13
41
|
def elements
|
14
42
|
self.block.elements
|
15
43
|
end
|
16
44
|
|
45
|
+
def type
|
46
|
+
self.block.elements.first.first.previous.name
|
47
|
+
end
|
48
|
+
|
49
|
+
# get headline by name
|
50
|
+
def headline name
|
51
|
+
name = name.downcase.gsub(" ", "_")
|
52
|
+
self.headlines.reject do |k,v|
|
53
|
+
!k.downcase.start_with?(name)
|
54
|
+
end.values()
|
55
|
+
end
|
56
|
+
|
57
|
+
# recursive headline search
|
58
|
+
# def headline_by_name name, depth = 1
|
59
|
+
# name = name.downcase.gsub(" ", "_")
|
60
|
+
# ret = []
|
61
|
+
# self.headlines.each do |k,v|
|
62
|
+
# ret << v if k.downcase.start_with?(name)
|
63
|
+
# next if v.headlines.empty?
|
64
|
+
# if depth > 0
|
65
|
+
# q = v.headline_by_name name, (depth - 1)
|
66
|
+
# ret.concat q
|
67
|
+
# end
|
68
|
+
# end
|
69
|
+
# ret
|
70
|
+
# end
|
17
71
|
|
72
|
+
# headline exists for current headline
|
73
|
+
def has_headline? name
|
74
|
+
name = name.downcase.gsub(" ", "_")
|
75
|
+
self.headlines.each do |k,v|
|
76
|
+
return true if k.downcase.start_with?(name)
|
77
|
+
end
|
78
|
+
false
|
79
|
+
end
|
80
|
+
|
81
|
+
def to_hash
|
82
|
+
ret = {name: self.name, headlines: [], type: self.type}
|
83
|
+
self.headlines.each do |headline_name, headline|
|
84
|
+
ret[:headlines] << headline.to_hash
|
85
|
+
end
|
86
|
+
ret
|
87
|
+
end
|
88
|
+
|
89
|
+
def to_pretty_json
|
90
|
+
JSON.pretty_generate self.to_hash
|
91
|
+
end
|
92
|
+
|
93
|
+
protected
|
94
|
+
|
95
|
+
# filter nested headlines (elements) from a parent headline (by name)
|
96
|
+
def nested_headlines headlines, name, original_level
|
97
|
+
ret = {}
|
98
|
+
init_level = nil
|
99
|
+
# iterate headlines, skip already done onces
|
100
|
+
#headlines.drop(headline_index + 1).each do |headline|
|
101
|
+
headlines.to_a.each do |name, value|
|
102
|
+
level = LEVEL.index value.first.first.previous.name
|
103
|
+
init_level ||= level
|
104
|
+
# lower level indicate nest end
|
105
|
+
break if level <= original_level
|
106
|
+
break if level < init_level
|
107
|
+
# higher level indicates nested items, these will be processed recursive
|
108
|
+
next if init_level != level
|
109
|
+
ret[name] = value
|
110
|
+
end
|
111
|
+
ret
|
112
|
+
end
|
18
113
|
|
19
114
|
end
|
20
115
|
|