oald_parser 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +10 -14
- data/Rakefile +1 -6
- data/lib/oald_parser/facade.rb +6 -3
- data/lib/oald_parser/formatter.rb +0 -0
- data/lib/oald_parser/oald_parser_exception.rb +1 -1
- data/lib/oald_parser/page_downloader.rb +1 -1
- data/lib/oald_parser/page_parser.rb +1 -1
- data/lib/oald_parser/word_extractor.rb +15 -9
- data/lib/oald_parser.rb +0 -19
- metadata +54 -61
data/README.rdoc
CHANGED
@@ -2,8 +2,7 @@
|
|
2
2
|
|
3
3
|
== Description
|
4
4
|
|
5
|
-
|
6
|
-
It uses online Oxford Advanced Learner's Dictionary for this purpose.
|
5
|
+
OALD Parser (Oxford Advanced Learner's Dictionary Parser) is a simple gem providing access to one of the best online dictionaries for people studing English.
|
7
6
|
|
8
7
|
== Installation
|
9
8
|
|
@@ -11,30 +10,27 @@ It uses online Oxford Advanced Learner's Dictionary for this purpose.
|
|
11
10
|
|
12
11
|
== Usage
|
13
12
|
|
14
|
-
|
15
|
-
for getting all the information.
|
16
|
-
The first way to create this facade is:
|
13
|
+
To hide the complexity of parsing and searching words I provide a simple facade:
|
17
14
|
|
18
|
-
facade = OaldParser::Facade.
|
15
|
+
facade = OaldParser::Facade.create_configured_instance
|
19
16
|
|
20
|
-
|
17
|
+
It can be used this way:
|
21
18
|
|
22
19
|
text = facade.describe(word: 'dog') OR
|
23
20
|
text = facade.describe(str: 'a dog [CN]')
|
24
21
|
|
25
|
-
The 'describe' method returns a plain text describing word or a string.
|
22
|
+
The 'describe' method returns a plain text describing a word or a string.
|
26
23
|
|
27
24
|
== Customizing
|
28
25
|
|
29
|
-
|
26
|
+
You can customize the behaviour of the facade by providing your own implemenation of one of the objects the facadeuses.
|
30
27
|
|
31
|
-
def self.
|
32
|
-
downloader = PageDownloader.new(
|
28
|
+
def self.create_configured_instance
|
29
|
+
downloader = PageDownloader.new(URL)
|
33
30
|
parser = PageParser.new
|
34
|
-
formatter = Formatter.new
|
31
|
+
formatter = Formatter.new
|
35
32
|
extractor = WordExtractor.new
|
36
33
|
Facade.new(downloader, parser, formatter, extractor)
|
37
34
|
end
|
38
35
|
|
39
|
-
|
40
|
-
a special instance of facade.
|
36
|
+
Please, take a look at a spec file for more details.
|
data/Rakefile
CHANGED
data/lib/oald_parser/facade.rb
CHANGED
@@ -6,6 +6,8 @@ require_relative 'word_extractor'
|
|
6
6
|
|
7
7
|
module OaldParser
|
8
8
|
class Facade
|
9
|
+
URL = 'http://www.oxfordadvancedlearnersdictionary.com/dictionary'
|
10
|
+
|
9
11
|
def initialize(downloader, parser, formatter, extractor)
|
10
12
|
@downloader = downloader
|
11
13
|
@parser = parser
|
@@ -13,8 +15,8 @@ module OaldParser
|
|
13
15
|
@extractor = extractor
|
14
16
|
end
|
15
17
|
|
16
|
-
def self.
|
17
|
-
downloader = PageDownloader.new(
|
18
|
+
def self.create_configured_instance
|
19
|
+
downloader = PageDownloader.new(URL)
|
18
20
|
parser = PageParser.new
|
19
21
|
formatter = Formatter.new
|
20
22
|
extractor = WordExtractor.new
|
@@ -23,6 +25,7 @@ module OaldParser
|
|
23
25
|
|
24
26
|
def describe(args)
|
25
27
|
word = get_word(args)
|
28
|
+
|
26
29
|
raise OaldParserException.new(OaldParserException::INTERNAL) unless word
|
27
30
|
|
28
31
|
page = @downloader.download(word)
|
@@ -46,4 +49,4 @@ module OaldParser
|
|
46
49
|
end
|
47
50
|
end
|
48
51
|
end
|
49
|
-
end
|
52
|
+
end
|
File without changes
|
@@ -1,19 +1,25 @@
|
|
1
1
|
module OaldParser
|
2
2
|
class WordExtractor
|
3
|
+
NOISE = /\Aa | a |\Aan | an |\Athe | the |\[.*\]|\(.*\)| adj | adj\z| adv | adv\z/i
|
4
|
+
|
3
5
|
def extract(str)
|
4
|
-
res =
|
5
|
-
|
6
|
+
res = remove_noise(str)
|
7
|
+
retrieve_first_long_word(res)
|
6
8
|
end
|
7
9
|
|
8
10
|
private
|
9
|
-
def
|
10
|
-
str.gsub(' ', ' ').
|
11
|
-
|
11
|
+
def remove_noise(str)
|
12
|
+
str.gsub(' ', ' ').gsub(NOISE, '')
|
13
|
+
end
|
14
|
+
|
15
|
+
def retrieve_first_long_word(str)
|
16
|
+
words = str.split(' ')
|
17
|
+
word = words.size > 1 ? find_first_long_word(words) : str
|
18
|
+
word.strip
|
12
19
|
end
|
13
20
|
|
14
|
-
def
|
15
|
-
|
16
|
-
parts.size > 1 ? parts.find{|w| w.size > 2} : str.strip
|
21
|
+
def find_first_long_word(words)
|
22
|
+
words.find{|w| w.size > 2}
|
17
23
|
end
|
18
24
|
end
|
19
|
-
end
|
25
|
+
end
|
data/lib/oald_parser.rb
CHANGED
@@ -3,22 +3,3 @@ require_relative 'oald_parser/formatter'
|
|
3
3
|
require_relative 'oald_parser/oald_parser_exception'
|
4
4
|
require_relative 'oald_parser/page_downloader'
|
5
5
|
require_relative 'oald_parser/page_parser'
|
6
|
-
|
7
|
-
|
8
|
-
#include OaldParser
|
9
|
-
#
|
10
|
-
#downloader = PageDownloader.new("http://www.oxfordadvancedlearnersdictionary.com/dictionary")
|
11
|
-
#page = downloader.download("a")
|
12
|
-
##puts page
|
13
|
-
#
|
14
|
-
#parser = PageParser.new
|
15
|
-
#parsed = parser.parse(page)
|
16
|
-
#puts parsed.inspect
|
17
|
-
#
|
18
|
-
#formatter = Formatter.new(items: 15)
|
19
|
-
#puts formatter.format(parsed)
|
20
|
-
|
21
|
-
#class=sd-g block
|
22
|
-
#class=n-g new line
|
23
|
-
#class=x-g new list item
|
24
|
-
#class=xr-g delete
|
metadata
CHANGED
@@ -1,61 +1,62 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: oald_parser
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 2
|
8
|
-
- 2
|
9
|
-
version: 0.2.2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.3
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Victor Savkin
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
date: 2010-05-11 00:00:00 +11:00
|
12
|
+
date: 2011-07-01 00:00:00.000000000 -04:00
|
18
13
|
default_executable:
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
21
16
|
name: rspec
|
17
|
+
requirement: &2165618180 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :development
|
22
24
|
prerelease: false
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
25
|
+
version_requirements: *2165618180
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: watchr
|
28
|
+
requirement: &2165617620 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
30
34
|
type: :development
|
31
|
-
version_requirements: *id001
|
32
|
-
- !ruby/object:Gem::Dependency
|
33
|
-
name: nokogiri
|
34
35
|
prerelease: false
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
version_requirements: *2165617620
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: nokogiri
|
39
|
+
requirement: &2165617080 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
42
45
|
type: :runtime
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *2165617080
|
48
|
+
description: Oxford Advanced Learner's Dictionary Parser
|
49
|
+
email:
|
46
50
|
- avix1000@gmail.com
|
47
51
|
executables: []
|
48
|
-
|
49
52
|
extensions: []
|
50
|
-
|
51
53
|
extra_rdoc_files: []
|
52
|
-
|
53
|
-
|
54
|
-
- lib/oald_parser/page_downloader.rb
|
54
|
+
files:
|
55
|
+
- lib/oald_parser/facade.rb
|
55
56
|
- lib/oald_parser/formatter.rb
|
56
57
|
- lib/oald_parser/oald_parser_exception.rb
|
58
|
+
- lib/oald_parser/page_downloader.rb
|
57
59
|
- lib/oald_parser/page_parser.rb
|
58
|
-
- lib/oald_parser/facade.rb
|
59
60
|
- lib/oald_parser/word_extractor.rb
|
60
61
|
- lib/oald_parser.rb
|
61
62
|
- README.rdoc
|
@@ -63,34 +64,26 @@ files:
|
|
63
64
|
has_rdoc: true
|
64
65
|
homepage:
|
65
66
|
licenses: []
|
66
|
-
|
67
67
|
post_install_message:
|
68
68
|
rdoc_options: []
|
69
|
-
|
70
|
-
require_paths:
|
69
|
+
require_paths:
|
71
70
|
- lib
|
72
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
requirements:
|
81
|
-
- -
|
82
|
-
- !ruby/object:Gem::Version
|
83
|
-
segments:
|
84
|
-
- 1
|
85
|
-
- 3
|
86
|
-
- 6
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
87
82
|
version: 1.3.6
|
88
83
|
requirements: []
|
89
|
-
|
90
84
|
rubyforge_project:
|
91
|
-
rubygems_version: 1.
|
85
|
+
rubygems_version: 1.6.2
|
92
86
|
signing_key:
|
93
87
|
specification_version: 3
|
94
|
-
summary:
|
88
|
+
summary: Oxford Advanced Learner's Dictionary Parser
|
95
89
|
test_files: []
|
96
|
-
|