oald_parser 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,24 +2,27 @@ require_relative 'oald_parser_exception'
2
2
  require_relative 'page_parser'
3
3
  require_relative 'formatter'
4
4
  require_relative 'page_downloader'
5
+ require_relative 'word_extractor'
5
6
 
6
7
  module OaldParser
7
8
  class Facade
8
- def initialize(downloader, parser, formatter)
9
+ def initialize(downloader, parser, formatter, extractor)
9
10
  @downloader = downloader
10
11
  @parser = parser
11
12
  @formatter = formatter
13
+ @extractor = extractor
12
14
  end
13
15
 
14
16
  def self.create_facade
15
17
  downloader = PageDownloader.new('http://www.oup.com/oald-bin/web_getald7index1a.pl')
16
18
  parser = PageParser.new
17
19
  formatter = Formatter.new(lines: 15)
18
- Facade.new(downloader, parser, formatter)
20
+ extractor = WordExtractor.new
21
+ Facade.new(downloader, parser, formatter, extractor)
19
22
  end
20
23
 
21
24
  def describe(args)
22
- word = args[:word]
25
+ word = get_word(args)
23
26
  raise OaldParserException.new(OaldParserException::INTERNAL) unless word
24
27
 
25
28
  page = @downloader.download(word)
@@ -33,5 +36,14 @@ module OaldParser
33
36
 
34
37
  formatted
35
38
  end
39
+
40
+ private
41
+ def get_word(args)
42
+ if args[:word]
43
+ args[:word]
44
+ elsif args[:str]
45
+ @extractor.extract(args[:str])
46
+ end
47
+ end
36
48
  end
37
49
  end
@@ -0,0 +1,19 @@
1
+ module OaldParser
2
+ class WordExtractor
3
+ def extract(str)
4
+ res = remove_unused_words(str)
5
+ find_first_big_word(res)
6
+ end
7
+
8
+ private
9
+ def remove_unused_words(str)
10
+ str.gsub(' ', ' ').
11
+ gsub(/\Aa | a |\Aan | an |\Athe | the |\[.*\]|\(.*\)| adj | adj\z| adv | adv\z/i, '')
12
+ end
13
+
14
+ def find_first_big_word(str)
15
+ parts = str.split(' ')
16
+ parts.size > 1 ? parts.find{|w| w.size > 2} : str.strip
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,5 @@
1
+ require_relative 'oald_parser/facade'
2
+ require_relative 'oald_parser/formatter'
3
+ require_relative 'oald_parser/oald_parser_exception'
4
+ require_relative 'oald_parser/page_downloader'
5
+ require_relative 'oald_parser/page_parser'
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 1
9
- version: 0.1.1
8
+ - 2
9
+ version: 0.1.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Victor Savkin
@@ -56,6 +56,8 @@ files:
56
56
  - lib/oald_parser/oald_parser_exception.rb
57
57
  - lib/oald_parser/page_parser.rb
58
58
  - lib/oald_parser/facade.rb
59
+ - lib/oald_parser/word_extractor.rb
60
+ - lib/oald_parser.rb
59
61
  has_rdoc: true
60
62
  homepage:
61
63
  licenses: []