oald_parser 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,37 @@
1
+ require_relative 'oald_parser_exception'
2
+ require_relative 'page_parser'
3
+ require_relative 'formatter'
4
+ require_relative 'page_downloader'
5
+
6
+ module OaldParser
7
+ class Facade
8
+ def initialize(downloader, parser, formatter)
9
+ @downloader = downloader
10
+ @parser = parser
11
+ @formatter = formatter
12
+ end
13
+
14
+ def self.create_facade
15
+ downloader = PageDownloader.new('http://www.oup.com/oald-bin/web_getald7index1a.pl')
16
+ parser = PageParser.new
17
+ formatter = Formatter.new(lines: 15)
18
+ Facade.new(downloader, parser, formatter)
19
+ end
20
+
21
+ def describe(args)
22
+ word = args[:word]
23
+ raise OaldParserException.new(OaldParserException::INTERNAL) unless word
24
+
25
+ page = @downloader.download(word)
26
+ raise OaldParserException.new(OaldParserException::NET) unless page
27
+
28
+ parsed = @parser.parse(page)
29
+ raise OaldParserException.new(OaldParserException::PARSER) unless parsed
30
+
31
+ formatted = @formatter.format(parsed)
32
+ raise OaldParserException.new(OaldParserException::FORMATTER) unless formatted
33
+
34
+ formatted
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,23 @@
1
+ require 'nokogiri'
2
+
3
+ module OaldParser
4
+ class Formatter
5
+ def initialize(options)
6
+ @options = options
7
+ end
8
+
9
+ def format(content)
10
+ lined_content = content.gsub(/<\s*br\s*\/*>/, '\n')
11
+ text = Nokogiri::HTML(lined_content).text
12
+ first_lines = first_lines(text, @options[:lines])
13
+ first_lines.strip
14
+ rescue
15
+ nil
16
+ end
17
+
18
+ private
19
+ def first_lines(text, lines)
20
+ text.split('\n').first(lines).join('\n')
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,19 @@
1
+ module OaldParser
2
+ class OaldParserException < Exception
3
+ NET = :net
4
+ PARSER = :parser
5
+ FORMATTER = :formatter
6
+ INTERNAL = :internal
7
+
8
+ attr_reader :code
9
+
10
+ def initialize(code, message = nil)
11
+ @code = code
12
+ if message
13
+ super message
14
+ else
15
+ super "#{@code} problems"
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,17 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+
4
+ module OaldParser
5
+ class PageDownloader
6
+ def initialize(url)
7
+ @url = url
8
+ end
9
+
10
+ def download(word)
11
+ url = URI.parse(@url)
12
+ Net::HTTP.post_form(url, search_word: word)
13
+ rescue
14
+ nil
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ module OaldParser
2
+ class PageParser
3
+ def parse(page)
4
+ page = extract_part_without_header(page)
5
+ return nil unless page
6
+ page = extract_part_without_footer(page)
7
+ return nil unless page
8
+ page.strip
9
+ end
10
+
11
+ private
12
+ def extract_part_without_header(page)
13
+ parts = page.split(/<\/select>\s*<\/form>/i)
14
+ parts.size == 2 ? parts[1] : nil
15
+ end
16
+
17
+ def extract_part_without_footer(page)
18
+ parts = page.split(/<div\s+class='oald'>/i)
19
+ parts.size == 2 ? parts[0] : nil
20
+ end
21
+ end
22
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: oald_parser
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ version: "0.1"
9
+ platform: ruby
10
+ authors:
11
+ - Victor Savkin
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+
16
+ date: 2010-04-13 00:00:00 +11:00
17
+ default_executable:
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: rspec
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ segments:
27
+ - 0
28
+ version: "0"
29
+ type: :development
30
+ version_requirements: *id001
31
+ description: Simple parse for online oxford dictionary
32
+ email:
33
+ - avix1000@gmail.com
34
+ executables: []
35
+
36
+ extensions: []
37
+
38
+ extra_rdoc_files: []
39
+
40
+ files:
41
+ - lib/oald_parser/page_downloader.rb
42
+ - lib/oald_parser/formatter.rb
43
+ - lib/oald_parser/oald_parser_exception.rb
44
+ - lib/oald_parser/page_parser.rb
45
+ - lib/oald_parser/facade.rb
46
+ has_rdoc: true
47
+ homepage:
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options: []
52
+
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ segments:
67
+ - 1
68
+ - 3
69
+ - 6
70
+ version: 1.3.6
71
+ requirements: []
72
+
73
+ rubyforge_project:
74
+ rubygems_version: 1.3.6
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: Simple parse for online oxford dictionary
78
+ test_files: []
79
+