wiki-api 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,27 +1,28 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  # Collection of elements for segmented per headline
5
6
  class PageBlock
6
-
7
7
  attr_accessor :elements, :parent
8
8
 
9
- def initialize options={}
10
- self.parent = options[:parent] if options.include? :parent
9
+ def initialize(options = {})
10
+ self.parent = options[:parent] if options.include?(:parent)
11
11
  self.elements = []
12
12
  end
13
13
 
14
- def << value
14
+ def <<(value)
15
15
  # value.first.previous.name
16
- self.elements << value
16
+ elements << value
17
17
  end
18
18
 
19
19
  def to_texts
20
20
  texts = []
21
- self.elements.flatten.each do |element|
22
- text = Wiki::Api::Util.element_to_text element if element.is_a? Nokogiri::XML::Element
21
+ elements.flatten.each do |element|
22
+ text = Wiki::Api::Util.element_to_text(element) if element.is_a?(Nokogiri::XML::Element)
23
23
  next if text.nil?
24
24
  next if text.empty?
25
+
25
26
  texts << text
26
27
  end
27
28
  texts
@@ -29,27 +30,25 @@ module Wiki
29
30
 
30
31
  def list_items
31
32
  # TODO: perhaps we should wrap the elements with objects, and request a li per element??
32
- self.search("li").map do |list_item|
33
- PageListItem.new parent: self, element: list_item
33
+ search('li').map do |list_item|
34
+ PageListItem.new(parent: self, element: list_item)
34
35
  end
35
36
  end
36
37
 
37
38
  def links
38
39
  # TODO: perhaps we should wrap the elements with objects, and request a li per element??
39
- self.search("a").map do |a|
40
- PageLink.new parent: self, element: a
40
+ search('a').map do |a|
41
+ PageLink.new(parent: self, element: a)
41
42
  end
42
43
  end
43
44
 
44
45
  protected
45
46
 
46
47
  def search *paths
47
- self.elements.flatten.flat_map do |element|
48
+ elements.flatten.flat_map do |element|
48
49
  element.search(*paths)
49
- end.reject{|t| t.nil?}
50
+ end.reject(&:nil?)
50
51
  end
51
-
52
52
  end
53
-
54
53
  end
55
- end
54
+ end
@@ -1,117 +1,118 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  # Headline for a page (class="mw-healine")
5
6
  class PageHeadline
6
-
7
7
  require 'json'
8
8
 
9
- LEVEL = ["text", "h1", "h2", "h3", "h4", "h5", "h6"]
9
+ LEVEL = %w[text h1 h2 h3 h4 h5 h6].freeze
10
10
 
11
11
  attr_accessor :name, :block, :parent, :headlines, :level
12
12
 
13
- def initialize options={}
14
- self.name = options[:name] if options.include? :name
15
- self.parent = options[:parent] if options.include? :parent
16
- self.level = options[:level] if options.include? :level
13
+ def initialize(options = {})
14
+ self.name = options[:name] if options.include?(:name)
15
+ self.parent = options[:parent] if options.include?(:parent)
16
+ self.level = options[:level] if options.include?(:level)
17
17
  options[:headlines] ||= []
18
18
  self.headlines ||= {}
19
19
 
20
20
  # store elements in a block
21
- self.block = PageBlock.new parent: self
22
- if options[:headlines].include? self.name
23
- options[:headlines][self.name].each do |element|
24
- self.block << element
21
+ self.block = PageBlock.new(parent: self)
22
+ if options[:headlines].include?(name)
23
+ options[:headlines][name].each do |element|
24
+ block << element
25
25
  end
26
26
  end
27
27
 
28
28
  # collect nested headlines
29
29
  headlines = options[:headlines]
30
30
  # remove self from list
31
- headlines.delete self.name
32
- nested_headlines = self.nested_headlines headlines, self.name, self.level
31
+ headlines.delete(name)
32
+ nested_headlines = self.nested_headlines(headlines, name, level)
33
33
 
34
34
  # iterate nested headlines, and call recursive
35
35
  nested_headlines.each do |headline_name, value|
36
- level = LEVEL.index value.first.first.previous.name
37
- self.headlines[headline_name] = (PageHeadline.new parent: self, name: headline_name, headlines: headlines, level: level)
36
+ level = LEVEL.index(value.first.first.previous.name)
37
+ self.headlines[headline_name] =
38
+ PageHeadline.new(parent: self, name: headline_name, headlines:, level:)
38
39
  end
39
40
  end
40
41
 
41
42
  def elements
42
- self.block.elements
43
+ block.elements
43
44
  end
44
45
 
45
46
  def type
46
- self.block.elements.first.first.previous.name
47
+ block.elements.first.first.previous.name
47
48
  end
48
49
 
49
50
  # get headline by name
50
- def headline name
51
- name = name.downcase.gsub(" ", "_")
52
- self.headlines.reject do |k,v|
53
- !k.downcase.start_with?(name)
54
- end.values()
51
+ def headline(name)
52
+ name = name.downcase.gsub(' ', '_')
53
+ self.headlines.select do |k, _v|
54
+ k.downcase.start_with?(name)
55
+ end.values
55
56
  end
56
57
 
57
- # recursive headline search
58
- # def headline_by_name name, depth = 1
59
- # name = name.downcase.gsub(" ", "_")
60
- # ret = []
61
- # self.headlines.each do |k,v|
62
- # ret << v if k.downcase.start_with?(name)
63
- # next if v.headlines.empty?
64
- # if depth > 0
65
- # q = v.headline_by_name name, (depth - 1)
66
- # ret.concat q
67
- # end
68
- # end
69
- # ret
70
- # end
58
+ def headline_in_depth(name, depth = 1)
59
+ name = name.downcase.gsub(' ', '_')
60
+ ret = []
61
+
62
+ self.headlines.each do |k, v|
63
+ ret << v if k.downcase.start_with?(name)
64
+ next if v.headlines.empty?
65
+
66
+ if depth.positive?
67
+ q = v.headline_in_depth(name, (depth - 1))
68
+ ret.concat(q)
69
+ end
70
+ end
71
+ ret
72
+ end
71
73
 
72
74
  # headline exists for current headline
73
- def has_headline? name
74
- name = name.downcase.gsub(" ", "_")
75
- self.headlines.each do |k,v|
75
+ def has_headline?(name)
76
+ name = name.downcase.gsub(' ', '_')
77
+ self.headlines.each_key do |k|
76
78
  return true if k.downcase.start_with?(name)
77
79
  end
78
80
  false
79
81
  end
80
82
 
81
83
  def to_hash
82
- ret = {name: self.name, headlines: [], type: self.type}
83
- self.headlines.each do |headline_name, headline|
84
+ ret = { name:, headlines: [], type: }
85
+ self.headlines.each_value do |headline|
84
86
  ret[:headlines] << headline.to_hash
85
87
  end
86
88
  ret
87
89
  end
88
90
 
89
91
  def to_pretty_json
90
- JSON.pretty_generate self.to_hash
92
+ JSON.pretty_generate(to_hash)
91
93
  end
92
94
 
93
- protected
95
+ protected
94
96
 
95
97
  # filter nested headlines (elements) from a parent headline (by name)
96
- def nested_headlines headlines, name, original_level
98
+ def nested_headlines(headlines, _name, original_level)
97
99
  ret = {}
98
100
  init_level = nil
99
101
  # iterate headlines, skip already done onces
100
- #headlines.drop(headline_index + 1).each do |headline|
102
+ # headlines.drop(headline_index + 1).each do |headline|
101
103
  headlines.to_a.each do |name, value|
102
- level = LEVEL.index value.first.first.previous.name
103
- init_level ||= level
104
+ level = LEVEL.index(value.first.first.previous.name)
105
+ init_level ||= level
104
106
  # lower level indicate nest end
105
107
  break if level <= original_level
106
108
  break if level < init_level
107
109
  # higher level indicates nested items, these will be processed recursive
108
110
  next if init_level != level
111
+
109
112
  ret[name] = value
110
113
  end
111
114
  ret
112
115
  end
113
-
114
116
  end
115
-
116
117
  end
117
- end
118
+ end
@@ -1,38 +1,37 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  # Link on a wiki page (a href=xxx)
5
6
  class PageLink
6
-
7
7
  attr_accessor :element, :parent
8
8
 
9
- def initialize options={}
10
- self.element = options[:element] if options.include? :element
11
- self.parent = options[:parent] if options.include? :parent
9
+ def initialize(options = {})
10
+ self.element = options[:element] if options.include?(:element)
11
+ self.parent = options[:parent] if options.include?(:parent)
12
12
  end
13
13
 
14
14
  def to_text
15
- Wiki::Api::Util.element_to_text self.element
15
+ Wiki::Api::Util.element_to_text(element)
16
16
  end
17
17
 
18
18
  def uri
19
19
  # lookup the root parent, and get connector info
20
20
  host = Wiki::Api::Util.parent_root(self).connect.uri
21
- href_value = self.element.attributes["href"].value
22
- URI.parse "#{host}#{href_value}"
21
+ href_value = element.attributes['href'].value
22
+ URI.parse("#{host}#{href_value}")
23
23
  end
24
24
 
25
25
  def title
26
26
  # skip links with no title
27
- return "" if self.element.attributes["title"].nil?
28
- self.element.attributes["title"].value
27
+ return '' if element.attributes['title'].nil?
28
+
29
+ element.attributes['title'].value
29
30
  end
30
31
 
31
32
  def html
32
- "<a href=\"#{self.uri}\" alt=\"#{self.title}\">#{self.title}</a>"
33
+ "<a href=\"#{uri}\" alt=\"#{title}\">#{title}</a>"
33
34
  end
34
-
35
35
  end
36
-
37
36
  end
38
- end
37
+ end
@@ -1,34 +1,31 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  # List Items on a Page (li=xxx)
5
6
  class PageListItem
6
-
7
7
  attr_accessor :element, :parent
8
8
 
9
- def initialize options={}
10
- self.element = options[:element] if options.include? :element
11
- self.parent = options[:parent] if options.include? :parent
9
+ def initialize(options = {})
10
+ self.parent = options[:parent] if options.include?(:parent)
11
+ self.element = options[:element] if options.include?(:element)
12
12
  end
13
13
 
14
14
  def to_text
15
- Wiki::Api::Util.element_to_text self.element
15
+ Wiki::Api::Util.element_to_text(element)
16
16
  end
17
17
 
18
18
  def links
19
- self.search("a").map do |a|
20
- PageLink.new parent: self, element: a
19
+ search('a').map do |a|
20
+ PageLink.new(parent: self, element: a)
21
21
  end
22
22
  end
23
23
 
24
24
  protected
25
25
 
26
26
  def search *paths
27
- self.element.search(*paths)
27
+ element.search(*paths)
28
28
  end
29
-
30
-
31
29
  end
32
-
33
30
  end
34
- end
31
+ end
data/lib/wiki/api/util.rb CHANGED
@@ -1,46 +1,44 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  class Util
5
-
6
6
  class << self
7
+ def element_to_text(element)
8
+ raise('not an element') unless element.is_a?(Nokogiri::XML::Element)
7
9
 
8
- def element_to_text element
9
- raise "not an element" unless element.is_a? Nokogiri::XML::Element
10
- self.clean_text element.text
10
+ clean_text(element.text)
11
11
  end
12
12
 
13
- def element_filter_lists element
14
- raise "not an element" unless element.is_a? Nokogiri::XML::Element
13
+ def element_filter_lists(element)
14
+ raise('not an element') unless element.is_a?(Nokogiri::XML::Element)
15
+
15
16
  result = {}
16
- element.search("li").each_with_index do |li, i|
17
+ element.search('li').each_with_index do |li, i|
17
18
  li.children.each do |child|
18
19
  result[i] ||= []
19
- result[i] << self.clean_text(child.text)
20
+ result[i] << clean_text(child.text)
20
21
  end
21
22
  end
22
- result.map{ |k,v| v.join("") }
23
+ result.map { |_k, v| v.join('') }
23
24
  end
24
25
 
25
- def parent_root current_object
26
+ def parent_root(current_object)
26
27
  current = current_object
27
- while true do
28
+ loop do
28
29
  break if current.parent.nil?
30
+
29
31
  current = current.parent
30
32
  end
31
33
  current
32
34
  end
33
35
 
34
-
35
-
36
-
37
36
  protected
38
- def clean_text text
39
- text.gsub(/\n/, " ").squeeze(" ").gsub(/\s(\W)/, '\1').gsub(/(\W)\s/, '\1 ').strip
40
- end
41
37
 
38
+ def clean_text(text)
39
+ text.gsub(/\n/, ' ').squeeze(' ').gsub(/\s(\W)/, '\1').gsub(/(\W)\s/, '\1 ').strip
40
+ end
42
41
  end
43
-
44
42
  end
45
43
  end
46
- end
44
+ end
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
- VERSION = "0.1.0"
5
+ VERSION = '0.1.2'
4
6
  end
5
7
  end
data/lib/wiki/api.rb CHANGED
@@ -1,12 +1,13 @@
1
- require File.expand_path(File.dirname(__FILE__) + "/api/version")
2
- require File.expand_path(File.dirname(__FILE__) + "/api/connect")
3
- require File.expand_path(File.dirname(__FILE__) + "/api/page")
4
- require File.expand_path(File.dirname(__FILE__) + "/api/page_headline")
5
- require File.expand_path(File.dirname(__FILE__) + "/api/page_block")
6
- require File.expand_path(File.dirname(__FILE__) + "/api/page_list_item")
7
- require File.expand_path(File.dirname(__FILE__) + "/api/page_link")
8
- require File.expand_path(File.dirname(__FILE__) + "/api/util")
1
+ # frozen_string_literal: true
9
2
 
3
+ require File.expand_path("#{File.dirname(__FILE__)}/api/version")
4
+ require File.expand_path("#{File.dirname(__FILE__)}/api/connect")
5
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page")
6
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page_headline")
7
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page_block")
8
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page_list_item")
9
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page_link")
10
+ require File.expand_path("#{File.dirname(__FILE__)}/api/util")
10
11
 
11
12
  module Wiki
12
13
  module Api
data/test/test_helper.rb CHANGED
@@ -1,8 +1,5 @@
1
+ # frozen_string_literal: true
1
2
 
2
- class ActiveSupport::TestCase
3
- setup :global_setup
4
-
5
- def global_setup
6
- end
7
- end
8
-
3
+ require 'rubygems'
4
+ require 'test/unit'
5
+ require File.expand_path("#{File.dirname(__FILE__)}/../lib/wiki/api")
@@ -1,51 +1,44 @@
1
- require 'rubygems'
2
- require 'test/unit'
3
- require File.expand_path(File.dirname(__FILE__) + "/../../lib/wiki/api")
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+ require 'pry'
4
5
 
5
6
  #
6
7
  # Testing the connection to https://www.mediawiki.org/wiki/API:Main_page
7
8
  #
8
9
 
9
10
  class WikiConnect < Test::Unit::TestCase
10
-
11
- CONFIG = { uri: "http://en.wiktionary.org" }
11
+ CONFIG = { uri: 'https://en.wiktionary.org' }.freeze
12
12
 
13
13
  def setup
14
14
  Wiki::Api::Connect.config = CONFIG
15
15
  end
16
16
 
17
- def teardown
18
- end
17
+ def teardown; end
19
18
 
20
19
  def test_connection_wiktionary
21
- c = Wiki::Api::Connect.new uri: "http://en.wiktionary.org"
20
+ c = Wiki::Api::Connect.new(uri: 'http://en.wiktionary.org')
22
21
  ret = c.connect
23
- assert ret.is_a?(Net::HTTPOK), "invalid response http"
22
+ assert(ret.is_a?(Net::HTTPMovedPermanently), 'invalid response http')
24
23
  end
25
24
 
26
25
  def test_connection_https_wiktionary
27
- c = Wiki::Api::Connect.new uri: "https://en.wiktionary.org"
26
+ c = Wiki::Api::Connect.new(uri: 'https://en.wiktionary.org')
28
27
  ret = c.connect
29
- assert ret.is_a?(Net::HTTPOK), "invalid response https"
28
+ assert(ret.is_a?(Net::HTTPOK), 'invalid response https')
30
29
  end
31
30
 
32
31
  def test_page_get
33
- begin
34
- c = Wiki::Api::Connect.new
35
- c.page "Wiktionary:Welcome,_newcomers"
36
- rescue Exception => e
37
- assert false, "expected valid page #{e.message}"
38
- end
32
+ c = Wiki::Api::Connect.new
33
+ c.page('Wiktionary:Welcome,_newcomers')
34
+ rescue Exception => e
35
+ assert(false, "expected valid page #{e.message}")
39
36
  end
40
37
 
41
38
  def test_page_get_non_exist
42
- begin
43
- c = Wiki::Api::Connect.new
44
- response = c.page "asfsldkfjjlkanv98yhok"
45
- rescue Exception => e
46
- assert (e.message == "missingtitle"), "expected invalid page #{e.message}"
47
- end
39
+ c = Wiki::Api::Connect.new
40
+ c.page('asfsldkfjjlkanv98yhok')
41
+ rescue Exception => e
42
+ assert((e.message == 'missingtitle'), "expected invalid page #{e.message}")
48
43
  end
49
-
50
-
51
44
  end