oald_parser 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/oald_parser/facade.rb +15 -3
- data/lib/oald_parser/word_extractor.rb +19 -0
- data/lib/oald_parser.rb +5 -0
- metadata +4 -2
data/lib/oald_parser/facade.rb
CHANGED
@@ -2,24 +2,27 @@ require_relative 'oald_parser_exception'
|
|
2
2
|
require_relative 'page_parser'
|
3
3
|
require_relative 'formatter'
|
4
4
|
require_relative 'page_downloader'
|
5
|
+
require_relative 'word_extractor'
|
5
6
|
|
6
7
|
module OaldParser
|
7
8
|
class Facade
|
8
|
-
def initialize(downloader, parser, formatter)
|
9
|
+
def initialize(downloader, parser, formatter, extractor)
|
9
10
|
@downloader = downloader
|
10
11
|
@parser = parser
|
11
12
|
@formatter = formatter
|
13
|
+
@extractor = extractor
|
12
14
|
end
|
13
15
|
|
14
16
|
def self.create_facade
|
15
17
|
downloader = PageDownloader.new('http://www.oup.com/oald-bin/web_getald7index1a.pl')
|
16
18
|
parser = PageParser.new
|
17
19
|
formatter = Formatter.new(lines: 15)
|
18
|
-
|
20
|
+
extractor = WordExtractor.new
|
21
|
+
Facade.new(downloader, parser, formatter, extractor)
|
19
22
|
end
|
20
23
|
|
21
24
|
def describe(args)
|
22
|
-
word = args
|
25
|
+
word = get_word(args)
|
23
26
|
raise OaldParserException.new(OaldParserException::INTERNAL) unless word
|
24
27
|
|
25
28
|
page = @downloader.download(word)
|
@@ -33,5 +36,14 @@ module OaldParser
|
|
33
36
|
|
34
37
|
formatted
|
35
38
|
end
|
39
|
+
|
40
|
+
private
|
41
|
+
def get_word(args)
|
42
|
+
if args[:word]
|
43
|
+
args[:word]
|
44
|
+
elsif args[:str]
|
45
|
+
@extractor.extract(args[:str])
|
46
|
+
end
|
47
|
+
end
|
36
48
|
end
|
37
49
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module OaldParser
|
2
|
+
class WordExtractor
|
3
|
+
def extract(str)
|
4
|
+
res = remove_unused_words(str)
|
5
|
+
find_first_big_word(res)
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
def remove_unused_words(str)
|
10
|
+
str.gsub(' ', ' ').
|
11
|
+
gsub(/\Aa | a |\Aan | an |\Athe | the |\[.*\]|\(.*\)| adj | adj\z| adv | adv\z/i, '')
|
12
|
+
end
|
13
|
+
|
14
|
+
def find_first_big_word(str)
|
15
|
+
parts = str.split(' ')
|
16
|
+
parts.size > 1 ? parts.find{|w| w.size > 2} : str.strip
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/oald_parser.rb
ADDED
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 2
|
9
|
+
version: 0.1.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Victor Savkin
|
@@ -56,6 +56,8 @@ files:
|
|
56
56
|
- lib/oald_parser/oald_parser_exception.rb
|
57
57
|
- lib/oald_parser/page_parser.rb
|
58
58
|
- lib/oald_parser/facade.rb
|
59
|
+
- lib/oald_parser/word_extractor.rb
|
60
|
+
- lib/oald_parser.rb
|
59
61
|
has_rdoc: true
|
60
62
|
homepage:
|
61
63
|
licenses: []
|