oald_parser 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -2,8 +2,7 @@
2
2
 
3
3
  == Description
4
4
 
5
- OaldParser is a simple gem for getting descriptions for words.
6
- It uses online Oxford Advanced Learner's Dictionary for this purpose.
5
+ OALD Parser (Oxford Advanced Learner's Dictionary Parser) is a simple gem providing access to one of the best online dictionaries for people studing English.
7
6
 
8
7
  == Installation
9
8
 
@@ -11,30 +10,27 @@ It uses online Oxford Advanced Learner's Dictionary for this purpose.
11
10
 
12
11
  == Usage
13
12
 
14
- The whole gem is hidden by a facade class that should be used
15
- for getting all the information.
16
- The first way to create this facade is:
13
+ To hide the complexity of parsing and searching words I provide a simple facade:
17
14
 
18
- facade = OaldParser::Facade.create_facade
15
+ facade = OaldParser::Facade.create_configured_instance
19
16
 
20
- After that it can be used in such a way:
17
+ It can be used this way:
21
18
 
22
19
  text = facade.describe(word: 'dog') OR
23
20
  text = facade.describe(str: 'a dog [CN]')
24
21
 
25
- The 'describe' method returns a plain text describing word or a string.
22
+ The 'describe' method returns a plain text describing a word or a string.
26
23
 
27
24
  == Customizing
28
25
 
29
- If you want to customize the facade you should look at the 'create_facade' method:
26
+ You can customize the behaviour of the facade by providing your own implemenation of one of the objects the facadeuses.
30
27
 
31
- def self.create_facade
32
- downloader = PageDownloader.new('http://www.oup.com/oald-bin/web_getald7index1a.pl')
28
+ def self.create_configured_instance
29
+ downloader = PageDownloader.new(URL)
33
30
  parser = PageParser.new
34
- formatter = Formatter.new(lines: 15)
31
+ formatter = Formatter.new
35
32
  extractor = WordExtractor.new
36
33
  Facade.new(downloader, parser, formatter, extractor)
37
34
  end
38
35
 
39
- As you can see it is not a problem to use your own parser or formatter and create
40
- a special instance of facade.
36
+ Please, take a look at a spec file for more details.
data/Rakefile CHANGED
@@ -1,6 +1 @@
1
- require 'spec/rake/spectask'
2
-
3
- desc "Run all specs"
4
- Spec::Rake::SpecTask.new('spec') do |t|
5
- t.spec_files = FileList['spec/**/*.rb']
6
- end
1
+ require 'bundler/gem_tasks'
@@ -6,6 +6,8 @@ require_relative 'word_extractor'
6
6
 
7
7
  module OaldParser
8
8
  class Facade
9
+ URL = 'http://www.oxfordadvancedlearnersdictionary.com/dictionary'
10
+
9
11
  def initialize(downloader, parser, formatter, extractor)
10
12
  @downloader = downloader
11
13
  @parser = parser
@@ -13,8 +15,8 @@ module OaldParser
13
15
  @extractor = extractor
14
16
  end
15
17
 
16
- def self.create_facade
17
- downloader = PageDownloader.new('http://www.oxfordadvancedlearnersdictionary.com/dictionary')
18
+ def self.create_configured_instance
19
+ downloader = PageDownloader.new(URL)
18
20
  parser = PageParser.new
19
21
  formatter = Formatter.new
20
22
  extractor = WordExtractor.new
@@ -23,6 +25,7 @@ module OaldParser
23
25
 
24
26
  def describe(args)
25
27
  word = get_word(args)
28
+
26
29
  raise OaldParserException.new(OaldParserException::INTERNAL) unless word
27
30
 
28
31
  page = @downloader.download(word)
@@ -46,4 +49,4 @@ module OaldParser
46
49
  end
47
50
  end
48
51
  end
49
- end
52
+ end
File without changes
@@ -16,4 +16,4 @@ module OaldParser
16
16
  end
17
17
  end
18
18
  end
19
- end
19
+ end
@@ -14,4 +14,4 @@ module OaldParser
14
14
  nil
15
15
  end
16
16
  end
17
- end
17
+ end
@@ -83,4 +83,4 @@ module OaldParser
83
83
  elements.collect{|e|e.text}.join('').strip
84
84
  end
85
85
  end
86
- end
86
+ end
@@ -1,19 +1,25 @@
1
1
  module OaldParser
2
2
  class WordExtractor
3
+ NOISE = /\Aa | a |\Aan | an |\Athe | the |\[.*\]|\(.*\)| adj | adj\z| adv | adv\z/i
4
+
3
5
  def extract(str)
4
- res = remove_unused_words(str)
5
- find_first_big_word(res)
6
+ res = remove_noise(str)
7
+ retrieve_first_long_word(res)
6
8
  end
7
9
 
8
10
  private
9
- def remove_unused_words(str)
10
- str.gsub(' ', ' ').
11
- gsub(/\Aa | a |\Aan | an |\Athe | the |\[.*\]|\(.*\)| adj | adj\z| adv | adv\z/i, '')
11
+ def remove_noise(str)
12
+ str.gsub(' ', ' ').gsub(NOISE, '')
13
+ end
14
+
15
+ def retrieve_first_long_word(str)
16
+ words = str.split(' ')
17
+ word = words.size > 1 ? find_first_long_word(words) : str
18
+ word.strip
12
19
  end
13
20
 
14
- def find_first_big_word(str)
15
- parts = str.split(' ')
16
- parts.size > 1 ? parts.find{|w| w.size > 2} : str.strip
21
+ def find_first_long_word(words)
22
+ words.find{|w| w.size > 2}
17
23
  end
18
24
  end
19
- end
25
+ end
data/lib/oald_parser.rb CHANGED
@@ -3,22 +3,3 @@ require_relative 'oald_parser/formatter'
3
3
  require_relative 'oald_parser/oald_parser_exception'
4
4
  require_relative 'oald_parser/page_downloader'
5
5
  require_relative 'oald_parser/page_parser'
6
-
7
-
8
- #include OaldParser
9
- #
10
- #downloader = PageDownloader.new("http://www.oxfordadvancedlearnersdictionary.com/dictionary")
11
- #page = downloader.download("a")
12
- ##puts page
13
- #
14
- #parser = PageParser.new
15
- #parsed = parser.parse(page)
16
- #puts parsed.inspect
17
- #
18
- #formatter = Formatter.new(items: 15)
19
- #puts formatter.format(parsed)
20
-
21
- #class=sd-g block
22
- #class=n-g new line
23
- #class=x-g new list item
24
- #class=xr-g delete
metadata CHANGED
@@ -1,61 +1,62 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: oald_parser
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 2
8
- - 2
9
- version: 0.2.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.3
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Victor Savkin
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2010-05-11 00:00:00 +11:00
12
+ date: 2011-07-01 00:00:00.000000000 -04:00
18
13
  default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
21
16
  name: rspec
17
+ requirement: &2165618180 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :development
22
24
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- segments:
28
- - 0
29
- version: "0"
25
+ version_requirements: *2165618180
26
+ - !ruby/object:Gem::Dependency
27
+ name: watchr
28
+ requirement: &2165617620 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
30
34
  type: :development
31
- version_requirements: *id001
32
- - !ruby/object:Gem::Dependency
33
- name: nokogiri
34
35
  prerelease: false
35
- requirement: &id002 !ruby/object:Gem::Requirement
36
- requirements:
37
- - - ">="
38
- - !ruby/object:Gem::Version
39
- segments:
40
- - 0
41
- version: "0"
36
+ version_requirements: *2165617620
37
+ - !ruby/object:Gem::Dependency
38
+ name: nokogiri
39
+ requirement: &2165617080 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
42
45
  type: :runtime
43
- version_requirements: *id002
44
- description: Simple parse for online oxford dictionary
45
- email:
46
+ prerelease: false
47
+ version_requirements: *2165617080
48
+ description: Oxford Advanced Learner's Dictionary Parser
49
+ email:
46
50
  - avix1000@gmail.com
47
51
  executables: []
48
-
49
52
  extensions: []
50
-
51
53
  extra_rdoc_files: []
52
-
53
- files:
54
- - lib/oald_parser/page_downloader.rb
54
+ files:
55
+ - lib/oald_parser/facade.rb
55
56
  - lib/oald_parser/formatter.rb
56
57
  - lib/oald_parser/oald_parser_exception.rb
58
+ - lib/oald_parser/page_downloader.rb
57
59
  - lib/oald_parser/page_parser.rb
58
- - lib/oald_parser/facade.rb
59
60
  - lib/oald_parser/word_extractor.rb
60
61
  - lib/oald_parser.rb
61
62
  - README.rdoc
@@ -63,34 +64,26 @@ files:
63
64
  has_rdoc: true
64
65
  homepage:
65
66
  licenses: []
66
-
67
67
  post_install_message:
68
68
  rdoc_options: []
69
-
70
- require_paths:
69
+ require_paths:
71
70
  - lib
72
- required_ruby_version: !ruby/object:Gem::Requirement
73
- requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- segments:
77
- - 0
78
- version: "0"
79
- required_rubygems_version: !ruby/object:Gem::Requirement
80
- requirements:
81
- - - ">="
82
- - !ruby/object:Gem::Version
83
- segments:
84
- - 1
85
- - 3
86
- - 6
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
87
82
  version: 1.3.6
88
83
  requirements: []
89
-
90
84
  rubyforge_project:
91
- rubygems_version: 1.3.6
85
+ rubygems_version: 1.6.2
92
86
  signing_key:
93
87
  specification_version: 3
94
- summary: Simple parse for online oxford dictionary
88
+ summary: Oxford Advanced Learner's Dictionary Parser
95
89
  test_files: []
96
-