oald_parser 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,7 +16,7 @@ module OaldParser
16
16
  def self.create_facade
17
17
  downloader = PageDownloader.new('http://www.oxfordadvancedlearnersdictionary.com/dictionary')
18
18
  parser = PageParser.new
19
- formatter = Formatter.new(items: 5)
19
+ formatter = Formatter.new
20
20
  extractor = WordExtractor.new
21
21
  Facade.new(downloader, parser, formatter, extractor)
22
22
  end
@@ -2,36 +2,29 @@ require 'nokogiri'
2
2
 
3
3
  module OaldParser
4
4
  class Formatter
5
- def initialize(options)
6
- @options = options
7
- end
8
-
9
5
  def format(page)
10
- if !page.blocks.empty?
11
- format_blocks(page.blocks)
12
- else
13
- format_items(page.items)
14
- end
6
+ format_blocks(page.blocks)
15
7
  end
16
8
 
17
9
  private
18
- def format_blocks(blocks, limit = 1000)
10
+ def format_blocks(blocks)
19
11
  blocks.collect do |block|
20
12
  res = ''
21
- res += block.text.upcase
22
- res += "\n"
23
- res += '-' * 20
24
- res += "\n"
13
+ unless block.text.empty?
14
+ res += block.text.upcase
15
+ res += "\n"
16
+ res += '-' * 20
17
+ res += "\n"
18
+ end
25
19
  res += format_items(block.items)
26
20
  res
27
21
  end.join("\n\n")
28
22
  end
29
23
 
30
- def format_items(items, limit = 1000)
24
+ def format_items(items)
31
25
  items.collect do |item|
32
- res = ''
33
- res += item.text
34
- if !item.examples.empty?
26
+ res = item.text
27
+ unless item.examples.empty?
35
28
  res += "\n"
36
29
  res += format_examples(item.examples)
37
30
  end
@@ -40,7 +33,7 @@ module OaldParser
40
33
  end
41
34
 
42
35
  def format_examples(examples)
43
- examples.collect {|e| "+ #{e}"}.join("\n")
36
+ examples.collect{|e| "+ #{e}"}.join("\n")
44
37
  end
45
38
  end
46
39
  end
@@ -2,9 +2,13 @@ require 'nokogiri'
2
2
 
3
3
  module OaldParser
4
4
  class Page
5
- attr_reader :blocks, :items
6
- def initialize(blocks, items)
7
- @blocks, @items = blocks, items
5
+ attr_reader :blocks
6
+ def initialize(blocks)
7
+ @blocks = blocks
8
+ end
9
+
10
+ def self.empty
11
+ Page.new []
8
12
  end
9
13
  end
10
14
 
@@ -26,26 +30,28 @@ module OaldParser
26
30
  def parse(page)
27
31
  parsed = Nokogiri::HTML(page)
28
32
  if blocks_on_page? parsed
29
- Page.new(parse_block(parsed), [])
30
- elsif def_on_page? parsed
31
- Page.new([], parse_def(parsed))
33
+ Page.new(parse_block(parsed))
32
34
  else
33
- Page.new([], parse_items(parsed))
35
+ parse_page_from_items(parsed)
34
36
  end
35
37
  end
36
38
 
37
39
  private
38
- def blocks_on_page?(page)
39
- page.css('div.sd-g').first
40
+ def parse_page_from_items(parsed)
41
+ items = parse_items(parsed)
42
+ if items.empty?
43
+ Page.empty
44
+ else
45
+ Page.new([Block.new("", items)])
46
+ end
40
47
  end
41
48
 
42
- def def_on_page?(page)
43
- page.css('div.h-g').first
49
+ def blocks_on_page?(page)
50
+ page.css('div.sd-g').first
44
51
  end
45
52
 
46
53
  def parse_block(page)
47
- block_nodes = page.css('div.sd-g')
48
- block_nodes.collect do |block|
54
+ page.css('div.sd-g').collect do |block|
49
55
  block_text = all_except(block, 'n-g')
50
56
  items = parse_items(block)
51
57
  Block.new(block_text, items)
@@ -53,19 +59,16 @@ module OaldParser
53
59
  end
54
60
 
55
61
  def parse_items(block)
56
- item_nodes = block.css('span.n-g')
57
- item_nodes.collect do |item|
62
+ items = block.css('span.n-g').collect do |item|
58
63
  item_text = all_except(item, 'x-g')
59
64
  Item.new(item_text, parse_examples(item))
60
- end
61
- end
65
+ end
62
66
 
63
- def parse_def(page)
64
- item_nodes = page.css('div.h-g')
65
- item_nodes.collect do |item|
66
- item_text = item.css('div.def_block').first.text.strip
67
- Item.new(item_text, parse_examples(item))
68
- end
67
+ if block.css('div.def_block').first
68
+ item_text = block.css('div.def_block').first.text.strip
69
+ items << Item.new(item_text, parse_examples(block))
70
+ end
71
+ items
69
72
  end
70
73
 
71
74
  def parse_examples(item)
data/lib/oald_parser.rb CHANGED
@@ -8,9 +8,13 @@ require_relative 'oald_parser/page_parser'
8
8
  #include OaldParser
9
9
  #
10
10
  #downloader = PageDownloader.new("http://www.oxfordadvancedlearnersdictionary.com/dictionary")
11
- #page = downloader.download("prevent")
11
+ #page = downloader.download("a")
12
+ ##puts page
13
+ #
12
14
  #parser = PageParser.new
13
15
  #parsed = parser.parse(page)
16
+ #puts parsed.inspect
17
+ #
14
18
  #formatter = Formatter.new(items: 15)
15
19
  #puts formatter.format(parsed)
16
20
 
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- - 1
9
- version: 0.2.1
8
+ - 2
9
+ version: 0.2.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Victor Savkin