oald_parser 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -2,8 +2,7 @@
2
2
 
3
3
  == Description
4
4
 
5
- OaldParser is a simple gem for getting descriptions for words.
6
- It uses online Oxford Advanced Learner's Dictionary for this purpose.
5
+ OALD Parser (Oxford Advanced Learner's Dictionary Parser) is a simple gem providing access to one of the best online dictionaries for people studing English.
7
6
 
8
7
  == Installation
9
8
 
@@ -11,30 +10,27 @@ It uses online Oxford Advanced Learner's Dictionary for this purpose.
11
10
 
12
11
  == Usage
13
12
 
14
- The whole gem is hidden by a facade class that should be used
15
- for getting all the information.
16
- The first way to create this facade is:
13
+ To hide the complexity of parsing and searching words I provide a simple facade:
17
14
 
18
- facade = OaldParser::Facade.create_facade
15
+ facade = OaldParser::Facade.create_configured_instance
19
16
 
20
- After that it can be used in such a way:
17
+ It can be used this way:
21
18
 
22
19
  text = facade.describe(word: 'dog') OR
23
20
  text = facade.describe(str: 'a dog [CN]')
24
21
 
25
- The 'describe' method returns a plain text describing word or a string.
22
+ The 'describe' method returns a plain text describing a word or a string.
26
23
 
27
24
  == Customizing
28
25
 
29
- If you want to customize the facade you should look at the 'create_facade' method:
26
+ You can customize the behaviour of the facade by providing your own implemenation of one of the objects the facadeuses.
30
27
 
31
- def self.create_facade
32
- downloader = PageDownloader.new('http://www.oup.com/oald-bin/web_getald7index1a.pl')
28
+ def self.create_configured_instance
29
+ downloader = PageDownloader.new(URL)
33
30
  parser = PageParser.new
34
- formatter = Formatter.new(lines: 15)
31
+ formatter = Formatter.new
35
32
  extractor = WordExtractor.new
36
33
  Facade.new(downloader, parser, formatter, extractor)
37
34
  end
38
35
 
39
- As you can see it is not a problem to use your own parser or formatter and create
40
- a special instance of facade.
36
+ Please, take a look at a spec file for more details.
data/Rakefile CHANGED
@@ -1,6 +1 @@
1
- require 'spec/rake/spectask'
2
-
3
- desc "Run all specs"
4
- Spec::Rake::SpecTask.new('spec') do |t|
5
- t.spec_files = FileList['spec/**/*.rb']
6
- end
1
+ require 'bundler/gem_tasks'
@@ -6,6 +6,8 @@ require_relative 'word_extractor'
6
6
 
7
7
  module OaldParser
8
8
  class Facade
9
+ URL = 'http://www.oxfordadvancedlearnersdictionary.com/dictionary'
10
+
9
11
  def initialize(downloader, parser, formatter, extractor)
10
12
  @downloader = downloader
11
13
  @parser = parser
@@ -13,8 +15,8 @@ module OaldParser
13
15
  @extractor = extractor
14
16
  end
15
17
 
16
- def self.create_facade
17
- downloader = PageDownloader.new('http://www.oxfordadvancedlearnersdictionary.com/dictionary')
18
+ def self.create_configured_instance
19
+ downloader = PageDownloader.new(URL)
18
20
  parser = PageParser.new
19
21
  formatter = Formatter.new
20
22
  extractor = WordExtractor.new
@@ -23,6 +25,7 @@ module OaldParser
23
25
 
24
26
  def describe(args)
25
27
  word = get_word(args)
28
+
26
29
  raise OaldParserException.new(OaldParserException::INTERNAL) unless word
27
30
 
28
31
  page = @downloader.download(word)
@@ -46,4 +49,4 @@ module OaldParser
46
49
  end
47
50
  end
48
51
  end
49
- end
52
+ end
File without changes
@@ -16,4 +16,4 @@ module OaldParser
16
16
  end
17
17
  end
18
18
  end
19
- end
19
+ end
@@ -14,4 +14,4 @@ module OaldParser
14
14
  nil
15
15
  end
16
16
  end
17
- end
17
+ end
@@ -83,4 +83,4 @@ module OaldParser
83
83
  elements.collect{|e|e.text}.join('').strip
84
84
  end
85
85
  end
86
- end
86
+ end
@@ -1,19 +1,25 @@
1
1
  module OaldParser
2
2
  class WordExtractor
3
+ NOISE = /\Aa | a |\Aan | an |\Athe | the |\[.*\]|\(.*\)| adj | adj\z| adv | adv\z/i
4
+
3
5
  def extract(str)
4
- res = remove_unused_words(str)
5
- find_first_big_word(res)
6
+ res = remove_noise(str)
7
+ retrieve_first_long_word(res)
6
8
  end
7
9
 
8
10
  private
9
- def remove_unused_words(str)
10
- str.gsub(' ', ' ').
11
- gsub(/\Aa | a |\Aan | an |\Athe | the |\[.*\]|\(.*\)| adj | adj\z| adv | adv\z/i, '')
11
+ def remove_noise(str)
12
+ str.gsub(' ', ' ').gsub(NOISE, '')
13
+ end
14
+
15
+ def retrieve_first_long_word(str)
16
+ words = str.split(' ')
17
+ word = words.size > 1 ? find_first_long_word(words) : str
18
+ word.strip
12
19
  end
13
20
 
14
- def find_first_big_word(str)
15
- parts = str.split(' ')
16
- parts.size > 1 ? parts.find{|w| w.size > 2} : str.strip
21
+ def find_first_long_word(words)
22
+ words.find{|w| w.size > 2}
17
23
  end
18
24
  end
19
- end
25
+ end
data/lib/oald_parser.rb CHANGED
@@ -3,22 +3,3 @@ require_relative 'oald_parser/formatter'
3
3
  require_relative 'oald_parser/oald_parser_exception'
4
4
  require_relative 'oald_parser/page_downloader'
5
5
  require_relative 'oald_parser/page_parser'
6
-
7
-
8
- #include OaldParser
9
- #
10
- #downloader = PageDownloader.new("http://www.oxfordadvancedlearnersdictionary.com/dictionary")
11
- #page = downloader.download("a")
12
- ##puts page
13
- #
14
- #parser = PageParser.new
15
- #parsed = parser.parse(page)
16
- #puts parsed.inspect
17
- #
18
- #formatter = Formatter.new(items: 15)
19
- #puts formatter.format(parsed)
20
-
21
- #class=sd-g block
22
- #class=n-g new line
23
- #class=x-g new list item
24
- #class=xr-g delete
metadata CHANGED
@@ -1,61 +1,62 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: oald_parser
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 2
8
- - 2
9
- version: 0.2.2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.3
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Victor Savkin
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2010-05-11 00:00:00 +11:00
12
+ date: 2011-07-01 00:00:00.000000000 -04:00
18
13
  default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
21
16
  name: rspec
17
+ requirement: &2165618180 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :development
22
24
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- segments:
28
- - 0
29
- version: "0"
25
+ version_requirements: *2165618180
26
+ - !ruby/object:Gem::Dependency
27
+ name: watchr
28
+ requirement: &2165617620 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
30
34
  type: :development
31
- version_requirements: *id001
32
- - !ruby/object:Gem::Dependency
33
- name: nokogiri
34
35
  prerelease: false
35
- requirement: &id002 !ruby/object:Gem::Requirement
36
- requirements:
37
- - - ">="
38
- - !ruby/object:Gem::Version
39
- segments:
40
- - 0
41
- version: "0"
36
+ version_requirements: *2165617620
37
+ - !ruby/object:Gem::Dependency
38
+ name: nokogiri
39
+ requirement: &2165617080 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
42
45
  type: :runtime
43
- version_requirements: *id002
44
- description: Simple parse for online oxford dictionary
45
- email:
46
+ prerelease: false
47
+ version_requirements: *2165617080
48
+ description: Oxford Advanced Learner's Dictionary Parser
49
+ email:
46
50
  - avix1000@gmail.com
47
51
  executables: []
48
-
49
52
  extensions: []
50
-
51
53
  extra_rdoc_files: []
52
-
53
- files:
54
- - lib/oald_parser/page_downloader.rb
54
+ files:
55
+ - lib/oald_parser/facade.rb
55
56
  - lib/oald_parser/formatter.rb
56
57
  - lib/oald_parser/oald_parser_exception.rb
58
+ - lib/oald_parser/page_downloader.rb
57
59
  - lib/oald_parser/page_parser.rb
58
- - lib/oald_parser/facade.rb
59
60
  - lib/oald_parser/word_extractor.rb
60
61
  - lib/oald_parser.rb
61
62
  - README.rdoc
@@ -63,34 +64,26 @@ files:
63
64
  has_rdoc: true
64
65
  homepage:
65
66
  licenses: []
66
-
67
67
  post_install_message:
68
68
  rdoc_options: []
69
-
70
- require_paths:
69
+ require_paths:
71
70
  - lib
72
- required_ruby_version: !ruby/object:Gem::Requirement
73
- requirements:
74
- - - ">="
75
- - !ruby/object:Gem::Version
76
- segments:
77
- - 0
78
- version: "0"
79
- required_rubygems_version: !ruby/object:Gem::Requirement
80
- requirements:
81
- - - ">="
82
- - !ruby/object:Gem::Version
83
- segments:
84
- - 1
85
- - 3
86
- - 6
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
87
82
  version: 1.3.6
88
83
  requirements: []
89
-
90
84
  rubyforge_project:
91
- rubygems_version: 1.3.6
85
+ rubygems_version: 1.6.2
92
86
  signing_key:
93
87
  specification_version: 3
94
- summary: Simple parse for online oxford dictionary
88
+ summary: Oxford Advanced Learner's Dictionary Parser
95
89
  test_files: []
96
-