oald_parser 0.2 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -25,8 +25,10 @@ module OaldParser
25
25
  class PageParser
26
26
  def parse(page)
27
27
  parsed = Nokogiri::HTML(page)
28
- if blocks_on_page?(parsed)
28
+ if blocks_on_page? parsed
29
29
  Page.new(parse_block(parsed), [])
30
+ elsif def_on_page? parsed
31
+ Page.new([], parse_def(parsed))
30
32
  else
31
33
  Page.new([], parse_items(parsed))
32
34
  end
@@ -37,6 +39,10 @@ module OaldParser
37
39
  page.css('div.sd-g').first
38
40
  end
39
41
 
42
+ def def_on_page?(page)
43
+ page.css('div.h-g').first
44
+ end
45
+
40
46
  def parse_block(page)
41
47
  block_nodes = page.css('div.sd-g')
42
48
  block_nodes.collect do |block|
@@ -50,15 +56,26 @@ module OaldParser
50
56
  item_nodes = block.css('span.n-g')
51
57
  item_nodes.collect do |item|
52
58
  item_text = all_except(item, 'x-g')
53
- example_nodes = item.css('span.x-g')
54
- examples = example_nodes.collect{|e| e.text.strip}
55
- Item.new(item_text, examples)
59
+ Item.new(item_text, parse_examples(item))
56
60
  end
57
61
  end
58
62
 
63
+ def parse_def(page)
64
+ item_nodes = page.css('div.h-g')
65
+ item_nodes.collect do |item|
66
+ item_text = item.css('div.def_block').first.text.strip
67
+ Item.new(item_text, parse_examples(item))
68
+ end
69
+ end
70
+
71
+ def parse_examples(item)
72
+ example_nodes = item.css('span.x-g')
73
+ example_nodes.collect{|e| e.text.strip}
74
+ end
75
+
59
76
  def all_except(item, class_name)
60
77
  elements = item.children.find_all do |c|
61
- !(c.name == 'span' && c[:class] == class_name)
78
+ !(['span', 'div'].include?(c.name) && c[:class] == class_name)
62
79
  end
63
80
  elements.collect{|e|e.text}.join('').strip
64
81
  end
data/lib/oald_parser.rb CHANGED
@@ -8,7 +8,7 @@ require_relative 'oald_parser/page_parser'
8
8
  #include OaldParser
9
9
  #
10
10
  #downloader = PageDownloader.new("http://www.oxfordadvancedlearnersdictionary.com/dictionary")
11
- #page = downloader.download("surface")
11
+ #page = downloader.download("prevent")
12
12
  #parser = PageParser.new
13
13
  #parsed = parser.parse(page)
14
14
  #formatter = Formatter.new(items: 15)
metadata CHANGED
@@ -5,7 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 2
8
- version: "0.2"
8
+ - 1
9
+ version: 0.2.1
9
10
  platform: ruby
10
11
  authors:
11
12
  - Victor Savkin
@@ -13,7 +14,7 @@ autorequire:
13
14
  bindir: bin
14
15
  cert_chain: []
15
16
 
16
- date: 2010-05-10 00:00:00 +11:00
17
+ date: 2010-05-11 00:00:00 +11:00
17
18
  default_executable:
18
19
  dependencies:
19
20
  - !ruby/object:Gem::Dependency