oald_parser 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ module OaldParser
16
16
  def self.create_facade
17
17
  downloader = PageDownloader.new('http://www.oxfordadvancedlearnersdictionary.com/dictionary')
18
18
  parser = PageParser.new
19
- formatter = Formatter.new(items: 5)
19
+ formatter = Formatter.new
20
20
  extractor = WordExtractor.new
21
21
  Facade.new(downloader, parser, formatter, extractor)
22
22
  end
@@ -2,36 +2,29 @@ require 'nokogiri'
2
2
 
3
3
  module OaldParser
4
4
  class Formatter
5
- def initialize(options)
6
- @options = options
7
- end
8
-
9
5
  def format(page)
10
- if !page.blocks.empty?
11
- format_blocks(page.blocks)
12
- else
13
- format_items(page.items)
14
- end
6
+ format_blocks(page.blocks)
15
7
  end
16
8
 
17
9
  private
18
- def format_blocks(blocks, limit = 1000)
10
+ def format_blocks(blocks)
19
11
  blocks.collect do |block|
20
12
  res = ''
21
- res += block.text.upcase
22
- res += "\n"
23
- res += '-' * 20
24
- res += "\n"
13
+ unless block.text.empty?
14
+ res += block.text.upcase
15
+ res += "\n"
16
+ res += '-' * 20
17
+ res += "\n"
18
+ end
25
19
  res += format_items(block.items)
26
20
  res
27
21
  end.join("\n\n")
28
22
  end
29
23
 
30
- def format_items(items, limit = 1000)
24
+ def format_items(items)
31
25
  items.collect do |item|
32
- res = ''
33
- res += item.text
34
- if !item.examples.empty?
26
+ res = item.text
27
+ unless item.examples.empty?
35
28
  res += "\n"
36
29
  res += format_examples(item.examples)
37
30
  end
@@ -40,7 +33,7 @@ module OaldParser
40
33
  end
41
34
 
42
35
  def format_examples(examples)
43
- examples.collect {|e| "+ #{e}"}.join("\n")
36
+ examples.collect{|e| "+ #{e}"}.join("\n")
44
37
  end
45
38
  end
46
39
  end
@@ -2,9 +2,13 @@ require 'nokogiri'
2
2
 
3
3
  module OaldParser
4
4
  class Page
5
- attr_reader :blocks, :items
6
- def initialize(blocks, items)
7
- @blocks, @items = blocks, items
5
+ attr_reader :blocks
6
+ def initialize(blocks)
7
+ @blocks = blocks
8
+ end
9
+
10
+ def self.empty
11
+ Page.new []
8
12
  end
9
13
  end
10
14
 
@@ -26,26 +30,28 @@ module OaldParser
26
30
  def parse(page)
27
31
  parsed = Nokogiri::HTML(page)
28
32
  if blocks_on_page? parsed
29
- Page.new(parse_block(parsed), [])
30
- elsif def_on_page? parsed
31
- Page.new([], parse_def(parsed))
33
+ Page.new(parse_block(parsed))
32
34
  else
33
- Page.new([], parse_items(parsed))
35
+ parse_page_from_items(parsed)
34
36
  end
35
37
  end
36
38
 
37
39
  private
38
- def blocks_on_page?(page)
39
- page.css('div.sd-g').first
40
+ def parse_page_from_items(parsed)
41
+ items = parse_items(parsed)
42
+ if items.empty?
43
+ Page.empty
44
+ else
45
+ Page.new([Block.new("", items)])
46
+ end
40
47
  end
41
48
 
42
- def def_on_page?(page)
43
- page.css('div.h-g').first
49
+ def blocks_on_page?(page)
50
+ page.css('div.sd-g').first
44
51
  end
45
52
 
46
53
  def parse_block(page)
47
- block_nodes = page.css('div.sd-g')
48
- block_nodes.collect do |block|
54
+ page.css('div.sd-g').collect do |block|
49
55
  block_text = all_except(block, 'n-g')
50
56
  items = parse_items(block)
51
57
  Block.new(block_text, items)
@@ -53,19 +59,16 @@ module OaldParser
53
59
  end
54
60
 
55
61
  def parse_items(block)
56
- item_nodes = block.css('span.n-g')
57
- item_nodes.collect do |item|
62
+ items = block.css('span.n-g').collect do |item|
58
63
  item_text = all_except(item, 'x-g')
59
64
  Item.new(item_text, parse_examples(item))
60
- end
61
- end
65
+ end
62
66
 
63
- def parse_def(page)
64
- item_nodes = page.css('div.h-g')
65
- item_nodes.collect do |item|
66
- item_text = item.css('div.def_block').first.text.strip
67
- Item.new(item_text, parse_examples(item))
68
- end
67
+ if block.css('div.def_block').first
68
+ item_text = block.css('div.def_block').first.text.strip
69
+ items << Item.new(item_text, parse_examples(block))
70
+ end
71
+ items
69
72
  end
70
73
 
71
74
  def parse_examples(item)
data/lib/oald_parser.rb CHANGED
@@ -8,9 +8,13 @@ require_relative 'oald_parser/page_parser'
8
8
  #include OaldParser
9
9
  #
10
10
  #downloader = PageDownloader.new("http://www.oxfordadvancedlearnersdictionary.com/dictionary")
11
- #page = downloader.download("prevent")
11
+ #page = downloader.download("a")
12
+ ##puts page
13
+ #
12
14
  #parser = PageParser.new
13
15
  #parsed = parser.parse(page)
16
+ #puts parsed.inspect
17
+ #
14
18
  #formatter = Formatter.new(items: 15)
15
19
  #puts formatter.format(parsed)
16
20
 
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 1
9
- version: 0.2.1
8
+ - 2
9
+ version: 0.2.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Victor Savkin