oald_parser 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ require_relative 'oald_parser_exception'
2
+ require_relative 'page_parser'
3
+ require_relative 'formatter'
4
+ require_relative 'page_downloader'
5
+
6
+ module OaldParser
7
+ class Facade
8
+ def initialize(downloader, parser, formatter)
9
+ @downloader = downloader
10
+ @parser = parser
11
+ @formatter = formatter
12
+ end
13
+
14
+ def self.create_facade
15
+ downloader = PageDownloader.new('http://www.oup.com/oald-bin/web_getald7index1a.pl')
16
+ parser = PageParser.new
17
+ formatter = Formatter.new(lines: 15)
18
+ Facade.new(downloader, parser, formatter)
19
+ end
20
+
21
+ def describe(args)
22
+ word = args[:word]
23
+ raise OaldParserException.new(OaldParserException::INTERNAL) unless word
24
+
25
+ page = @downloader.download(word)
26
+ raise OaldParserException.new(OaldParserException::NET) unless page
27
+
28
+ parsed = @parser.parse(page)
29
+ raise OaldParserException.new(OaldParserException::PARSER) unless parsed
30
+
31
+ formatted = @formatter.format(parsed)
32
+ raise OaldParserException.new(OaldParserException::FORMATTER) unless formatted
33
+
34
+ formatted
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,23 @@
1
+ require 'nokogiri'
2
+
3
+ module OaldParser
4
+ class Formatter
5
+ def initialize(options)
6
+ @options = options
7
+ end
8
+
9
+ def format(content)
10
+ lined_content = content.gsub(/<\s*br\s*\/*>/, '\n')
11
+ text = Nokogiri::HTML(lined_content).text
12
+ first_lines = first_lines(text, @options[:lines])
13
+ first_lines.strip
14
+ rescue
15
+ nil
16
+ end
17
+
18
+ private
19
+ def first_lines(text, lines)
20
+ text.split('\n').first(lines).join('\n')
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,19 @@
1
+ module OaldParser
2
+ class OaldParserException < Exception
3
+ NET = :net
4
+ PARSER = :parser
5
+ FORMATTER = :formatter
6
+ INTERNAL = :internal
7
+
8
+ attr_reader :code
9
+
10
+ def initialize(code, message = nil)
11
+ @code = code
12
+ if message
13
+ super message
14
+ else
15
+ super "#{@code} problems"
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,17 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+
4
+ module OaldParser
5
+ class PageDownloader
6
+ def initialize(url)
7
+ @url = url
8
+ end
9
+
10
+ def download(word)
11
+ url = URI.parse(@url)
12
+ Net::HTTP.post_form(url, search_word: word)
13
+ rescue
14
+ nil
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ module OaldParser
2
+ class PageParser
3
+ def parse(page)
4
+ page = extract_part_without_header(page)
5
+ return nil unless page
6
+ page = extract_part_without_footer(page)
7
+ return nil unless page
8
+ page.strip
9
+ end
10
+
11
+ private
12
+ def extract_part_without_header(page)
13
+ parts = page.split(/<\/select>\s*<\/form>/i)
14
+ parts.size == 2 ? parts[1] : nil
15
+ end
16
+
17
+ def extract_part_without_footer(page)
18
+ parts = page.split(/<div\s+class='oald'>/i)
19
+ parts.size == 2 ? parts[0] : nil
20
+ end
21
+ end
22
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: oald_parser
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ version: "0.1"
9
+ platform: ruby
10
+ authors:
11
+ - Victor Savkin
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+
16
+ date: 2010-04-13 00:00:00 +11:00
17
+ default_executable:
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: rspec
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ segments:
27
+ - 0
28
+ version: "0"
29
+ type: :development
30
+ version_requirements: *id001
31
+ description: Simple parse for online oxford dictionary
32
+ email:
33
+ - avix1000@gmail.com
34
+ executables: []
35
+
36
+ extensions: []
37
+
38
+ extra_rdoc_files: []
39
+
40
+ files:
41
+ - lib/oald_parser/page_downloader.rb
42
+ - lib/oald_parser/formatter.rb
43
+ - lib/oald_parser/oald_parser_exception.rb
44
+ - lib/oald_parser/page_parser.rb
45
+ - lib/oald_parser/facade.rb
46
+ has_rdoc: true
47
+ homepage:
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options: []
52
+
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ segments:
67
+ - 1
68
+ - 3
69
+ - 6
70
+ version: 1.3.6
71
+ requirements: []
72
+
73
+ rubyforge_project:
74
+ rubygems_version: 1.3.6
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: Simple parse for online oxford dictionary
78
+ test_files: []
79
+