wiki-api 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,27 +1,28 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  # Collection of elements for segmented per headline
5
6
  class PageBlock
6
-
7
7
  attr_accessor :elements, :parent
8
8
 
9
- def initialize options={}
10
- self.parent = options[:parent] if options.include? :parent
9
+ def initialize(options = {})
10
+ self.parent = options[:parent] if options.include?(:parent)
11
11
  self.elements = []
12
12
  end
13
13
 
14
- def << value
14
+ def <<(value)
15
15
  # value.first.previous.name
16
- self.elements << value
16
+ elements << value
17
17
  end
18
18
 
19
19
  def to_texts
20
20
  texts = []
21
- self.elements.flatten.each do |element|
22
- text = Wiki::Api::Util.element_to_text element if element.is_a? Nokogiri::XML::Element
21
+ elements.flatten.each do |element|
22
+ text = Wiki::Api::Util.element_to_text(element) if element.is_a?(Nokogiri::XML::Element)
23
23
  next if text.nil?
24
24
  next if text.empty?
25
+
25
26
  texts << text
26
27
  end
27
28
  texts
@@ -29,27 +30,25 @@ module Wiki
29
30
 
30
31
  def list_items
31
32
  # TODO: perhaps we should wrap the elements with objects, and request a li per element??
32
- self.search("li").map do |list_item|
33
- PageListItem.new parent: self, element: list_item
33
+ search('li').map do |list_item|
34
+ PageListItem.new(parent: self, element: list_item)
34
35
  end
35
36
  end
36
37
 
37
38
  def links
38
39
  # TODO: perhaps we should wrap the elements with objects, and request a li per element??
39
- self.search("a").map do |a|
40
- PageLink.new parent: self, element: a
40
+ search('a').map do |a|
41
+ PageLink.new(parent: self, element: a)
41
42
  end
42
43
  end
43
44
 
44
45
  protected
45
46
 
46
47
  def search *paths
47
- self.elements.flatten.flat_map do |element|
48
+ elements.flatten.flat_map do |element|
48
49
  element.search(*paths)
49
- end.reject{|t| t.nil?}
50
+ end.reject(&:nil?)
50
51
  end
51
-
52
52
  end
53
-
54
53
  end
55
- end
54
+ end
@@ -1,117 +1,118 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  # Headline for a page (class="mw-healine")
5
6
  class PageHeadline
6
-
7
7
  require 'json'
8
8
 
9
- LEVEL = ["text", "h1", "h2", "h3", "h4", "h5", "h6"]
9
+ LEVEL = %w[text h1 h2 h3 h4 h5 h6].freeze
10
10
 
11
11
  attr_accessor :name, :block, :parent, :headlines, :level
12
12
 
13
- def initialize options={}
14
- self.name = options[:name] if options.include? :name
15
- self.parent = options[:parent] if options.include? :parent
16
- self.level = options[:level] if options.include? :level
13
+ def initialize(options = {})
14
+ self.name = options[:name] if options.include?(:name)
15
+ self.parent = options[:parent] if options.include?(:parent)
16
+ self.level = options[:level] if options.include?(:level)
17
17
  options[:headlines] ||= []
18
18
  self.headlines ||= {}
19
19
 
20
20
  # store elements in a block
21
- self.block = PageBlock.new parent: self
22
- if options[:headlines].include? self.name
23
- options[:headlines][self.name].each do |element|
24
- self.block << element
21
+ self.block = PageBlock.new(parent: self)
22
+ if options[:headlines].include?(name)
23
+ options[:headlines][name].each do |element|
24
+ block << element
25
25
  end
26
26
  end
27
27
 
28
28
  # collect nested headlines
29
29
  headlines = options[:headlines]
30
30
  # remove self from list
31
- headlines.delete self.name
32
- nested_headlines = self.nested_headlines headlines, self.name, self.level
31
+ headlines.delete(name)
32
+ nested_headlines = self.nested_headlines(headlines, name, level)
33
33
 
34
34
  # iterate nested headlines, and call recursive
35
35
  nested_headlines.each do |headline_name, value|
36
- level = LEVEL.index value.first.first.previous.name
37
- self.headlines[headline_name] = (PageHeadline.new parent: self, name: headline_name, headlines: headlines, level: level)
36
+ level = LEVEL.index(value.first.first.previous.name)
37
+ self.headlines[headline_name] =
38
+ PageHeadline.new(parent: self, name: headline_name, headlines:, level:)
38
39
  end
39
40
  end
40
41
 
41
42
  def elements
42
- self.block.elements
43
+ block.elements
43
44
  end
44
45
 
45
46
  def type
46
- self.block.elements.first.first.previous.name
47
+ block.elements.first.first.previous.name
47
48
  end
48
49
 
49
50
  # get headline by name
50
- def headline name
51
- name = name.downcase.gsub(" ", "_")
52
- self.headlines.reject do |k,v|
53
- !k.downcase.start_with?(name)
54
- end.values()
51
+ def headline(name)
52
+ name = name.downcase.gsub(' ', '_')
53
+ self.headlines.select do |k, _v|
54
+ k.downcase.start_with?(name)
55
+ end.values
55
56
  end
56
57
 
57
- # recursive headline search
58
- # def headline_by_name name, depth = 1
59
- # name = name.downcase.gsub(" ", "_")
60
- # ret = []
61
- # self.headlines.each do |k,v|
62
- # ret << v if k.downcase.start_with?(name)
63
- # next if v.headlines.empty?
64
- # if depth > 0
65
- # q = v.headline_by_name name, (depth - 1)
66
- # ret.concat q
67
- # end
68
- # end
69
- # ret
70
- # end
58
+ def headline_in_depth(name, depth = 1)
59
+ name = name.downcase.gsub(' ', '_')
60
+ ret = []
61
+
62
+ self.headlines.each do |k, v|
63
+ ret << v if k.downcase.start_with?(name)
64
+ next if v.headlines.empty?
65
+
66
+ if depth.positive?
67
+ q = v.headline_in_depth(name, (depth - 1))
68
+ ret.concat(q)
69
+ end
70
+ end
71
+ ret
72
+ end
71
73
 
72
74
  # headline exists for current headline
73
- def has_headline? name
74
- name = name.downcase.gsub(" ", "_")
75
- self.headlines.each do |k,v|
75
+ def has_headline?(name)
76
+ name = name.downcase.gsub(' ', '_')
77
+ self.headlines.each_key do |k|
76
78
  return true if k.downcase.start_with?(name)
77
79
  end
78
80
  false
79
81
  end
80
82
 
81
83
  def to_hash
82
- ret = {name: self.name, headlines: [], type: self.type}
83
- self.headlines.each do |headline_name, headline|
84
+ ret = { name:, headlines: [], type: }
85
+ self.headlines.each_value do |headline|
84
86
  ret[:headlines] << headline.to_hash
85
87
  end
86
88
  ret
87
89
  end
88
90
 
89
91
  def to_pretty_json
90
- JSON.pretty_generate self.to_hash
92
+ JSON.pretty_generate(to_hash)
91
93
  end
92
94
 
93
- protected
95
+ protected
94
96
 
95
97
  # filter nested headlines (elements) from a parent headline (by name)
96
- def nested_headlines headlines, name, original_level
98
+ def nested_headlines(headlines, _name, original_level)
97
99
  ret = {}
98
100
  init_level = nil
99
101
  # iterate headlines, skip already done onces
100
- #headlines.drop(headline_index + 1).each do |headline|
102
+ # headlines.drop(headline_index + 1).each do |headline|
101
103
  headlines.to_a.each do |name, value|
102
- level = LEVEL.index value.first.first.previous.name
103
- init_level ||= level
104
+ level = LEVEL.index(value.first.first.previous.name)
105
+ init_level ||= level
104
106
  # lower level indicate nest end
105
107
  break if level <= original_level
106
108
  break if level < init_level
107
109
  # higher level indicates nested items, these will be processed recursive
108
110
  next if init_level != level
111
+
109
112
  ret[name] = value
110
113
  end
111
114
  ret
112
115
  end
113
-
114
116
  end
115
-
116
117
  end
117
- end
118
+ end
@@ -1,38 +1,37 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  # Link on a wiki page (a href=xxx)
5
6
  class PageLink
6
-
7
7
  attr_accessor :element, :parent
8
8
 
9
- def initialize options={}
10
- self.element = options[:element] if options.include? :element
11
- self.parent = options[:parent] if options.include? :parent
9
+ def initialize(options = {})
10
+ self.element = options[:element] if options.include?(:element)
11
+ self.parent = options[:parent] if options.include?(:parent)
12
12
  end
13
13
 
14
14
  def to_text
15
- Wiki::Api::Util.element_to_text self.element
15
+ Wiki::Api::Util.element_to_text(element)
16
16
  end
17
17
 
18
18
  def uri
19
19
  # lookup the root parent, and get connector info
20
20
  host = Wiki::Api::Util.parent_root(self).connect.uri
21
- href_value = self.element.attributes["href"].value
22
- URI.parse "#{host}#{href_value}"
21
+ href_value = element.attributes['href'].value
22
+ URI.parse("#{host}#{href_value}")
23
23
  end
24
24
 
25
25
  def title
26
26
  # skip links with no title
27
- return "" if self.element.attributes["title"].nil?
28
- self.element.attributes["title"].value
27
+ return '' if element.attributes['title'].nil?
28
+
29
+ element.attributes['title'].value
29
30
  end
30
31
 
31
32
  def html
32
- "<a href=\"#{self.uri}\" alt=\"#{self.title}\">#{self.title}</a>"
33
+ "<a href=\"#{uri}\" alt=\"#{title}\">#{title}</a>"
33
34
  end
34
-
35
35
  end
36
-
37
36
  end
38
- end
37
+ end
@@ -1,34 +1,31 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  # List Items on a Page (li=xxx)
5
6
  class PageListItem
6
-
7
7
  attr_accessor :element, :parent
8
8
 
9
- def initialize options={}
10
- self.element = options[:element] if options.include? :element
11
- self.parent = options[:parent] if options.include? :parent
9
+ def initialize(options = {})
10
+ self.parent = options[:parent] if options.include?(:parent)
11
+ self.element = options[:element] if options.include?(:element)
12
12
  end
13
13
 
14
14
  def to_text
15
- Wiki::Api::Util.element_to_text self.element
15
+ Wiki::Api::Util.element_to_text(element)
16
16
  end
17
17
 
18
18
  def links
19
- self.search("a").map do |a|
20
- PageLink.new parent: self, element: a
19
+ search('a').map do |a|
20
+ PageLink.new(parent: self, element: a)
21
21
  end
22
22
  end
23
23
 
24
24
  protected
25
25
 
26
26
  def search *paths
27
- self.element.search(*paths)
27
+ element.search(*paths)
28
28
  end
29
-
30
-
31
29
  end
32
-
33
30
  end
34
- end
31
+ end
data/lib/wiki/api/util.rb CHANGED
@@ -1,46 +1,44 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
-
4
5
  class Util
5
-
6
6
  class << self
7
+ def element_to_text(element)
8
+ raise('not an element') unless element.is_a?(Nokogiri::XML::Element)
7
9
 
8
- def element_to_text element
9
- raise "not an element" unless element.is_a? Nokogiri::XML::Element
10
- self.clean_text element.text
10
+ clean_text(element.text)
11
11
  end
12
12
 
13
- def element_filter_lists element
14
- raise "not an element" unless element.is_a? Nokogiri::XML::Element
13
+ def element_filter_lists(element)
14
+ raise('not an element') unless element.is_a?(Nokogiri::XML::Element)
15
+
15
16
  result = {}
16
- element.search("li").each_with_index do |li, i|
17
+ element.search('li').each_with_index do |li, i|
17
18
  li.children.each do |child|
18
19
  result[i] ||= []
19
- result[i] << self.clean_text(child.text)
20
+ result[i] << clean_text(child.text)
20
21
  end
21
22
  end
22
- result.map{ |k,v| v.join("") }
23
+ result.map { |_k, v| v.join('') }
23
24
  end
24
25
 
25
- def parent_root current_object
26
+ def parent_root(current_object)
26
27
  current = current_object
27
- while true do
28
+ loop do
28
29
  break if current.parent.nil?
30
+
29
31
  current = current.parent
30
32
  end
31
33
  current
32
34
  end
33
35
 
34
-
35
-
36
-
37
36
  protected
38
- def clean_text text
39
- text.gsub(/\n/, " ").squeeze(" ").gsub(/\s(\W)/, '\1').gsub(/(\W)\s/, '\1 ').strip
40
- end
41
37
 
38
+ def clean_text(text)
39
+ text.gsub(/\n/, ' ').squeeze(' ').gsub(/\s(\W)/, '\1').gsub(/(\W)\s/, '\1 ').strip
40
+ end
42
41
  end
43
-
44
42
  end
45
43
  end
46
- end
44
+ end
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Wiki
2
4
  module Api
3
- VERSION = "0.1.0"
5
+ VERSION = '0.1.2'
4
6
  end
5
7
  end
data/lib/wiki/api.rb CHANGED
@@ -1,12 +1,13 @@
1
- require File.expand_path(File.dirname(__FILE__) + "/api/version")
2
- require File.expand_path(File.dirname(__FILE__) + "/api/connect")
3
- require File.expand_path(File.dirname(__FILE__) + "/api/page")
4
- require File.expand_path(File.dirname(__FILE__) + "/api/page_headline")
5
- require File.expand_path(File.dirname(__FILE__) + "/api/page_block")
6
- require File.expand_path(File.dirname(__FILE__) + "/api/page_list_item")
7
- require File.expand_path(File.dirname(__FILE__) + "/api/page_link")
8
- require File.expand_path(File.dirname(__FILE__) + "/api/util")
1
+ # frozen_string_literal: true
9
2
 
3
+ require File.expand_path("#{File.dirname(__FILE__)}/api/version")
4
+ require File.expand_path("#{File.dirname(__FILE__)}/api/connect")
5
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page")
6
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page_headline")
7
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page_block")
8
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page_list_item")
9
+ require File.expand_path("#{File.dirname(__FILE__)}/api/page_link")
10
+ require File.expand_path("#{File.dirname(__FILE__)}/api/util")
10
11
 
11
12
  module Wiki
12
13
  module Api
data/test/test_helper.rb CHANGED
@@ -1,8 +1,5 @@
1
+ # frozen_string_literal: true
1
2
 
2
- class ActiveSupport::TestCase
3
- setup :global_setup
4
-
5
- def global_setup
6
- end
7
- end
8
-
3
+ require 'rubygems'
4
+ require 'test/unit'
5
+ require File.expand_path("#{File.dirname(__FILE__)}/../lib/wiki/api")
@@ -1,51 +1,44 @@
1
- require 'rubygems'
2
- require 'test/unit'
3
- require File.expand_path(File.dirname(__FILE__) + "/../../lib/wiki/api")
1
+ # frozen_string_literal: true
2
+
3
+ require 'test_helper'
4
+ require 'pry'
4
5
 
5
6
  #
6
7
  # Testing the connection to https://www.mediawiki.org/wiki/API:Main_page
7
8
  #
8
9
 
9
10
  class WikiConnect < Test::Unit::TestCase
10
-
11
- CONFIG = { uri: "http://en.wiktionary.org" }
11
+ CONFIG = { uri: 'https://en.wiktionary.org' }.freeze
12
12
 
13
13
  def setup
14
14
  Wiki::Api::Connect.config = CONFIG
15
15
  end
16
16
 
17
- def teardown
18
- end
17
+ def teardown; end
19
18
 
20
19
  def test_connection_wiktionary
21
- c = Wiki::Api::Connect.new uri: "http://en.wiktionary.org"
20
+ c = Wiki::Api::Connect.new(uri: 'http://en.wiktionary.org')
22
21
  ret = c.connect
23
- assert ret.is_a?(Net::HTTPOK), "invalid response http"
22
+ assert(ret.is_a?(Net::HTTPMovedPermanently), 'invalid response http')
24
23
  end
25
24
 
26
25
  def test_connection_https_wiktionary
27
- c = Wiki::Api::Connect.new uri: "https://en.wiktionary.org"
26
+ c = Wiki::Api::Connect.new(uri: 'https://en.wiktionary.org')
28
27
  ret = c.connect
29
- assert ret.is_a?(Net::HTTPOK), "invalid response https"
28
+ assert(ret.is_a?(Net::HTTPOK), 'invalid response https')
30
29
  end
31
30
 
32
31
  def test_page_get
33
- begin
34
- c = Wiki::Api::Connect.new
35
- c.page "Wiktionary:Welcome,_newcomers"
36
- rescue Exception => e
37
- assert false, "expected valid page #{e.message}"
38
- end
32
+ c = Wiki::Api::Connect.new
33
+ c.page('Wiktionary:Welcome,_newcomers')
34
+ rescue Exception => e
35
+ assert(false, "expected valid page #{e.message}")
39
36
  end
40
37
 
41
38
  def test_page_get_non_exist
42
- begin
43
- c = Wiki::Api::Connect.new
44
- response = c.page "asfsldkfjjlkanv98yhok"
45
- rescue Exception => e
46
- assert (e.message == "missingtitle"), "expected invalid page #{e.message}"
47
- end
39
+ c = Wiki::Api::Connect.new
40
+ c.page('asfsldkfjjlkanv98yhok')
41
+ rescue Exception => e
42
+ assert((e.message == 'missingtitle'), "expected invalid page #{e.message}")
48
43
  end
49
-
50
-
51
44
  end