oald_parser 0.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,8 +25,10 @@ module OaldParser
25
25
  class PageParser
26
26
  def parse(page)
27
27
  parsed = Nokogiri::HTML(page)
28
- if blocks_on_page?(parsed)
28
+ if blocks_on_page? parsed
29
29
  Page.new(parse_block(parsed), [])
30
+ elsif def_on_page? parsed
31
+ Page.new([], parse_def(parsed))
30
32
  else
31
33
  Page.new([], parse_items(parsed))
32
34
  end
@@ -37,6 +39,10 @@ module OaldParser
37
39
  page.css('div.sd-g').first
38
40
  end
39
41
 
42
+ def def_on_page?(page)
43
+ page.css('div.h-g').first
44
+ end
45
+
40
46
  def parse_block(page)
41
47
  block_nodes = page.css('div.sd-g')
42
48
  block_nodes.collect do |block|
@@ -50,15 +56,26 @@ module OaldParser
50
56
  item_nodes = block.css('span.n-g')
51
57
  item_nodes.collect do |item|
52
58
  item_text = all_except(item, 'x-g')
53
- example_nodes = item.css('span.x-g')
54
- examples = example_nodes.collect{|e| e.text.strip}
55
- Item.new(item_text, examples)
59
+ Item.new(item_text, parse_examples(item))
56
60
  end
57
61
  end
58
62
 
63
+ def parse_def(page)
64
+ item_nodes = page.css('div.h-g')
65
+ item_nodes.collect do |item|
66
+ item_text = item.css('div.def_block').first.text.strip
67
+ Item.new(item_text, parse_examples(item))
68
+ end
69
+ end
70
+
71
+ def parse_examples(item)
72
+ example_nodes = item.css('span.x-g')
73
+ example_nodes.collect{|e| e.text.strip}
74
+ end
75
+
59
76
  def all_except(item, class_name)
60
77
  elements = item.children.find_all do |c|
61
- !(c.name == 'span' && c[:class] == class_name)
78
+ !(['span', 'div'].include?(c.name) && c[:class] == class_name)
62
79
  end
63
80
  elements.collect{|e|e.text}.join('').strip
64
81
  end
data/lib/oald_parser.rb CHANGED
@@ -8,7 +8,7 @@ require_relative 'oald_parser/page_parser'
8
8
  #include OaldParser
9
9
  #
10
10
  #downloader = PageDownloader.new("http://www.oxfordadvancedlearnersdictionary.com/dictionary")
11
- #page = downloader.download("surface")
11
+ #page = downloader.download("prevent")
12
12
  #parser = PageParser.new
13
13
  #parsed = parser.parse(page)
14
14
  #formatter = Formatter.new(items: 15)
metadata CHANGED
@@ -5,7 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- version: "0.2"
8
+ - 1
9
+ version: 0.2.1
9
10
  platform: ruby
10
11
  authors:
11
12
  - Victor Savkin
@@ -13,7 +14,7 @@ autorequire:
13
14
  bindir: bin
14
15
  cert_chain: []
15
16
 
16
- date: 2010-05-10 00:00:00 +11:00
17
+ date: 2010-05-11 00:00:00 +11:00
17
18
  default_executable:
18
19
  dependencies:
19
20
  - !ruby/object:Gem::Dependency