dict 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. data/.gitignore +1 -1
  2. data/.rspec +2 -2
  3. data/.travis.yml +14 -14
  4. data/Gemfile +4 -4
  5. data/Gemfile.lock +40 -40
  6. data/README.md +34 -34
  7. data/Rakefile +8 -8
  8. data/bin/dict +7 -7
  9. data/dict.gemspec +28 -28
  10. data/lib/dict/cli/runner.rb +137 -115
  11. data/lib/dict/dict.rb +40 -53
  12. data/lib/dict/dictionary.rb +45 -62
  13. data/lib/dict/glosbe.rb +68 -66
  14. data/lib/dict/result.rb +38 -38
  15. data/lib/dict/version.rb +3 -3
  16. data/lib/dict/wiktionary.rb +91 -91
  17. data/lib/dict.rb +5 -5
  18. data/spec/dict/lib_dict_cli_runner_spec.rb +120 -120
  19. data/spec/dict/lib_dict_spec.rb +39 -39
  20. data/spec/dict/lib_glosbe_spec.rb +48 -34
  21. data/spec/dict/lib_wiktionary_spec.rb +68 -62
  22. data/spec/dict/spec_helper.rb +17 -17
  23. data/spec/dict/vcr_cassettes/glosbe_translations_asdfff_cassette.yml +1518 -0
  24. data/spec/dict/vcr_cassettes/glosbe_translations_atomic_cassette.yml +2794 -2794
  25. data/spec/dict/vcr_cassettes/glosbe_translations_usage_cassette.yml +2530 -0
  26. data/spec/dict/vcr_cassettes/glosbe_translations_woda_cassette.yml +1949 -1949
  27. data/spec/dict/vcr_cassettes/slowik_runner_cassette.yml +4178 -4178
  28. data/spec/dict/vcr_cassettes/translations_dragon_cassette.yml +8659 -8659
  29. data/spec/dict/vcr_cassettes/translations_slownik_cassette.yml +4177 -4177
  30. data/spec/dict/vcr_cassettes/wiktionary_no_usage_examples.yml +8634 -0
  31. data/spec/dict/vcr_cassettes/wiktionary_translate_result_uppercase.yml +6120 -0
  32. data/spec/dict/vcr_cassettes/wiktionary_translations_field_cassette.yml +8600 -8600
  33. data/spec/dict/vcr_cassettes/wiktionary_translations_samochod_cassette.yml +6140 -6140
  34. data/spec/dict/vcr_cassettes/wiktionary_usage_examples_kot.yml +6444 -0
  35. data/spec/dict/vcr_setup.rb +19 -19
  36. metadata +9 -4
@@ -1,62 +1,45 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # It is a base class for classes fetching results from Web dictionaries.
4
-
5
- require 'open-uri'
6
- require 'dict/result'
7
-
8
- module Dict
9
- class Dictionary
10
- attr_accessor :translations, :examples
11
-
12
- def initialize(word)
13
- check_arguments(word)
14
- @translations = []
15
- @examples = []
16
- @word = downcase_word(word)
17
- @result = Dict::Result.new(@word)
18
- end
19
-
20
- # Returns hash with structure as showed below
21
- # { 'TRANSLATION' => ['EXAMPLE', ...], ... }
22
- def make_hash_results(arr)
23
- hash = arr.each_slice(2).inject({}) do |h, (key, value)|
24
- if h.has_key?(key)
25
- h[key].push(value) ; h
26
- else
27
- h[key] = [value] ; h
28
- end
29
- end
30
- @translations, @examples = hash.keys, hash.values
31
- hash
32
- end
33
-
34
- # Returns an instance of URI::HTTP class.
35
- def uri(url, word = nil)
36
- word == nil ? URI(URI.escape(url)) : URI(URI.escape(url + word.downcase.tr(' ', '_')))
37
- end
38
-
39
- # Checks if word was given correctly.
40
- def check_arguments(word)
41
- raise ArgumentError.new("No given word") if word.empty?
42
- end
43
-
44
- def self.message
45
- 'There\'s no such dictionary in database.'
46
- end
47
-
48
- class ConnectError < Exception
49
- attr_reader :original
50
-
51
- def initialize(original = $!)
52
- @original = original
53
- end
54
- end
55
-
56
- private
57
- # Returns a word with all downcased letters, including polish
58
- def downcase_word(word)
59
- word.downcase.gsub(/[ĄĆĘŁŃÓŚŹŻ]/, 'Ą' => 'ą', 'Ć' => 'ć', 'Ę' => 'ę', 'Ł' => 'ł', 'Ń' => 'ń', 'Ó' => 'ó', 'Ś' => 'ś', 'Ź' => 'ź', 'Ż' => 'ż')
60
- end
61
- end
62
- end
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # It is a base class for classes fetching results from Web dictionaries.
4
+
5
+ require 'open-uri'
6
+ require 'dict/result'
7
+
8
+ module Dict
9
+ class Dictionary
10
+
11
+ def initialize(word)
12
+ check_arguments(word)
13
+ @word = downcase_word(word)
14
+ @result = Dict::Result.new(@word)
15
+ end
16
+
17
+ # Returns an instance of URI::HTTP class.
18
+ def uri(url, word = nil)
19
+ word == nil ? URI(URI.escape(url)) : URI(URI.escape(url + word.downcase.tr(' ', '_')))
20
+ end
21
+
22
+ # Checks if word was given correctly.
23
+ def check_arguments(word)
24
+ raise ArgumentError.new("No given word") if word.empty?
25
+ end
26
+
27
+ def self.message
28
+ 'There\'s no such dictionary in database.'
29
+ end
30
+
31
+ class ConnectError < Exception
32
+ attr_reader :original
33
+
34
+ def initialize(original = $!)
35
+ @original = original
36
+ end
37
+ end
38
+
39
+ private
40
+ # Returns a word with all downcased letters, including polish
41
+ def downcase_word(word)
42
+ word.downcase.gsub(/[ĄĆĘŁŃÓŚŹŻ]/, 'Ą' => 'ą', 'Ć' => 'ć', 'Ę' => 'ę', 'Ł' => 'ł', 'Ń' => 'ń', 'Ó' => 'ó', 'Ś' => 'ś', 'Ź' => 'ź', 'Ż' => 'ż')
43
+ end
44
+ end
45
+ end
data/lib/dict/glosbe.rb CHANGED
@@ -1,66 +1,68 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require 'nokogiri'
4
- require 'dict/dictionary'
5
-
6
- GLOSBE_PL = 'http://glosbe.com/pl/en/' # polish - english
7
- GLOSBE_EN = 'http://glosbe.com/en/pl/' # english - polish
8
-
9
- module Dict
10
- class Glosbe < Dictionary
11
- # returns an Dict::Result object
12
- def translate
13
- begin
14
- if is_polish?(doc = get_content(GLOSBE_PL, @word))
15
- add_translations(get_translations(doc))
16
- add_examples(get_examples(doc, 'en'))
17
- else
18
- doc = get_content(GLOSBE_EN, @word)
19
- add_translations(get_translations(doc))
20
- add_examples(get_examples(doc, 'pl'))
21
- end
22
-
23
- @result
24
- rescue OpenURI::HTTPError
25
- raise Dictionary::ConnectError
26
- end
27
- end
28
-
29
- private
30
-
31
- # checks if given word is polish
32
- def is_polish?(doc)
33
- doc.at_css('.content_box_rounded p').nil?
34
- end
35
-
36
- # returns a html structure of visited site
37
- def get_content(url, word)
38
- Nokogiri::HTML(open(uri(url, word))).css('.wordDetails')
39
- end
40
-
41
- # returns array with structure as shown below from the given dictionary link
42
- # ['TRANSLATION1', 'TRANSLATION2', ...]
43
- def get_translations(doc)
44
- doc.css('.phrase-container > .translation').each { |translation| translations.push(translation.text.downcase) }
45
- translations
46
- end
47
-
48
- # add obtained translations to Dict::Result object
49
- def add_translations(translations)
50
- translations.each { |translation| @result.add_translation(@result.term, translation) }
51
- end
52
-
53
- # returns array with structure as shown below from the given dictionary link
54
- # ['EXAMPLE1', 'EXAMPLE2', ...]
55
- # the default length of given example is 60 characters
56
- def get_examples(doc, lang, length = 60)
57
- doc.css(".tranlastionMemory td[lang=#{lang}]").each { |example| examples.push(example.text.capitalize) if example.text.length < length }
58
- examples
59
- end
60
-
61
- # add obtained examples to Dict::Result object
62
- def add_examples(examples)
63
- examples.each { |example| @result.add_example(@result.term, example) }
64
- end
65
- end
66
- end
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'nokogiri'
4
+ require 'dict/dictionary'
5
+
6
+ GLOSBE_PL = 'http://glosbe.com/pl/en/' # polish - english
7
+ GLOSBE_EN = 'http://glosbe.com/en/pl/' # english - polish
8
+
9
+ module Dict
10
+ class Glosbe < Dictionary
11
+ # returns an Dict::Result object
12
+ def translate
13
+ if is_polish?(doc = get_content(GLOSBE_PL, @word))
14
+ add_translations(get_translations(doc))
15
+ add_examples(get_examples(doc, 'en'))
16
+ else
17
+ doc = get_content(GLOSBE_EN, @word)
18
+ add_translations(get_translations(doc))
19
+ add_examples(get_examples(doc, 'pl'))
20
+ end
21
+
22
+ @result
23
+ end
24
+
25
+ private
26
+
27
+ # checks if given word is polish
28
+ def is_polish?(doc)
29
+ !doc.empty? && doc.at_css('.content_box_rounded p').nil?
30
+ end
31
+
32
+ # returns instance of Nokogiri::HTML module
33
+ def get_content(url, word)
34
+ begin
35
+ Nokogiri::HTML(open(uri(url, word))).css('.wordDetails')
36
+ rescue => e
37
+ ""
38
+ end
39
+ end
40
+
41
+ # returns array with structure as shown below from the given dictionary link
42
+ # ['TRANSLATION1', 'TRANSLATION2', ...]
43
+ def get_translations(doc)
44
+ translations = []
45
+ doc.css('.phrase-container > .translation').each { |translation| translations.push(translation.text.downcase) } if !doc.empty?
46
+ translations
47
+ end
48
+
49
+ # add obtained translations to Dict::Result object
50
+ def add_translations(translations)
51
+ translations.each { |translation| @result.add_translation(@result.term, translation) }
52
+ end
53
+
54
+ # returns array with structure as shown below from the given dictionary link
55
+ # ['EXAMPLE1', 'EXAMPLE2', ...]
56
+ # the default length of given example is 60 characters
57
+ def get_examples(doc, lang, length = 60)
58
+ examples = []
59
+ doc.css(".tranlastionMemory td[lang=#{lang}]").each { |example| examples.push(example.text.capitalize) if example.text.length < length } if !doc.empty?
60
+ examples
61
+ end
62
+
63
+ # add obtained examples to Dict::Result object
64
+ def add_examples(examples)
65
+ examples.each { |example| @result.add_example(@result.term, example) }
66
+ end
67
+ end
68
+ end
data/lib/dict/result.rb CHANGED
@@ -1,38 +1,38 @@
1
- # Objects of this class are returned by methods retrieving translations
2
- # from Web dictionaries.
3
-
4
- module Dict
5
- class Result
6
- attr_reader :term, :translations, :examples
7
-
8
- def initialize(term)
9
- @term = term
10
- @translations = {}
11
- @examples = {}
12
- end
13
-
14
- def add_translation(term, translation)
15
- add_result(@translations, term, translation)
16
- end
17
-
18
- def add_example(term, example)
19
- add_result(@examples, term, example)
20
- end
21
-
22
- def each_translation
23
- @translations.each_pair do |term,translation|
24
- yield term, translation
25
- end
26
- end
27
-
28
- private
29
- def add_result(hash, key, value)
30
- if hash.has_key?(key)
31
- hash[key].push(value)
32
- else
33
- hash.merge!({ key => [value] })
34
- end
35
- self
36
- end
37
- end
38
- end
1
+ # Objects of this class are returned by methods retrieving translations
2
+ # from Web dictionaries.
3
+
4
+ module Dict
5
+ class Result
6
+ attr_reader :term, :translations, :examples
7
+
8
+ def initialize(term)
9
+ @term = term
10
+ @translations = {}
11
+ @examples = {}
12
+ end
13
+
14
+ def add_translation(term, translation)
15
+ add_result(@translations, term, translation)
16
+ end
17
+
18
+ def add_example(term, example)
19
+ add_result(@examples, term, example)
20
+ end
21
+
22
+ def each_translation
23
+ @translations.each_pair do |term,translation|
24
+ yield term, translation
25
+ end
26
+ end
27
+
28
+ private
29
+ def add_result(hash, key, value)
30
+ if hash.has_key?(key)
31
+ hash[key].push(value)
32
+ else
33
+ hash.merge!({ key => [value] })
34
+ end
35
+ self
36
+ end
37
+ end
38
+ end
data/lib/dict/version.rb CHANGED
@@ -1,3 +1,3 @@
1
- module Dict
2
- VERSION = "0.3.3"
3
- end
1
+ module Dict
2
+ VERSION = "0.3.4"
3
+ end
@@ -1,91 +1,91 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # Class fetching translations of given word from wiktionary.org.
4
-
5
- require 'nokogiri'
6
- require 'dict/dictionary'
7
-
8
- module Dict
9
- class Wiktionary < Dictionary
10
-
11
- # Returns an Dict::Result object.
12
- def translate
13
- translations.each do |translation|
14
- @result.add_translation(@result.term, translation.gsub(/(\s[^|\s]+\|)/,' '))
15
- examples(translation).each { |example| @result.add_example(translation, example) }
16
- end
17
-
18
- @result
19
- end
20
-
21
- def get_html(url)
22
- begin
23
- Nokogiri::HTML(open(URI.encode(url)))
24
- rescue OpenURI::HTTPError
25
- raise Dictionary::ConnectError
26
- end
27
- end
28
-
29
- private
30
- def polish?(content)
31
- ! /==Polish==/i.match(content).nil?
32
- end
33
-
34
- # Returns an array containing translations.
35
- def translations
36
- url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
37
- url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
38
-
39
- content_pl = get_html(url_pl).css('textarea#wpTextbox1').first
40
- if polish?(content_pl)
41
- @is_polish = true
42
- extract_polish_translations(content_pl)
43
- else
44
- @is_polish = false
45
- extract_english_translations(get_html(url_en).css('textarea#wpTextbox1').first.content)
46
- end
47
- end
48
-
49
- # Returns an array containing polish translations.
50
- def extract_polish_translations(content)
51
- translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
52
- translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
53
- end
54
-
55
- # Returns an array containing english translations.
56
- def extract_english_translations(content)
57
- translations_block = /język\s+angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
58
- return [] unless translations_block.instance_of?(MatchData)
59
- translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
60
- translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
61
- translations.map! do |translation|
62
- translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'|<.*/,'').strip
63
- end
64
- translations.delete_if(&:empty?)
65
- translations ||= []
66
- end
67
-
68
- def examples(word)
69
- url_pl = "http://pl.wiktionary.org/w/index.php?title=#{word}&action=edit"
70
-
71
- if @is_polish
72
- extract_english_examples(word)
73
- else
74
- []
75
- end
76
- end
77
-
78
- # Returns an array containing usage examples of translated polish word to english.
79
- def extract_english_examples(word)
80
- word = word.gsub(/\s+\(.+$/,'') || ''
81
- url_en = "http://en.wiktionary.org/w/index.php?title=#{word}&action=edit"
82
- examples = /Noun[^\{]+\{\{en\-noun[^=]+/.match(get_html(url_en.gsub('{word}',word)).css('textarea#wpTextbox1').first)
83
- return [] unless examples.instance_of?(MatchData)
84
- examples = examples[0].scan(/#: ''([^\n]+)\n/)
85
- examples.map! do |translation|
86
- translation[0].gsub(/'{2,}/,'')
87
- end
88
- examples
89
- end
90
- end
91
- end
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # Class fetching translations of given word from wiktionary.org.
4
+
5
+ require 'nokogiri'
6
+ require 'dict/dictionary'
7
+
8
+ module Dict
9
+ class Wiktionary < Dictionary
10
+
11
+ # Returns an Dict::Result object.
12
+ def translate
13
+ translations.each do |translation|
14
+ @result.add_translation(@result.term, translation.gsub(/(\s[^|\s]+\|)/,' '))
15
+ examples(translation).each { |example| @result.add_example(translation, example) }
16
+ end
17
+
18
+ @result
19
+ end
20
+
21
+ def get_html(url)
22
+ begin
23
+ Nokogiri::HTML(open(URI.encode(url)))
24
+ rescue OpenURI::HTTPError
25
+ raise Dictionary::ConnectError
26
+ end
27
+ end
28
+
29
+ private
30
+ def polish?(content)
31
+ ! /==Polish==/i.match(content).nil?
32
+ end
33
+
34
+ # Returns an array containing translations.
35
+ def translations
36
+ url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
37
+ url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
38
+
39
+ content_pl = get_html(url_pl).css('textarea#wpTextbox1').first
40
+ if polish?(content_pl)
41
+ @is_polish = true
42
+ extract_polish_translations(content_pl)
43
+ else
44
+ @is_polish = false
45
+ extract_english_translations(get_html(url_en).css('textarea#wpTextbox1').first.content)
46
+ end
47
+ end
48
+
49
+ # Returns an array containing polish translations.
50
+ def extract_polish_translations(content)
51
+ translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
52
+ translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
53
+ end
54
+
55
+ # Returns an array containing english translations.
56
+ def extract_english_translations(content)
57
+ translations_block = /język\s+angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
58
+ return [] unless translations_block.instance_of?(MatchData)
59
+ translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
60
+ translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
61
+ translations.map! do |translation|
62
+ translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'|<.*/,'').strip
63
+ end
64
+ translations.delete_if(&:empty?)
65
+ translations ||= []
66
+ end
67
+
68
+ def examples(word)
69
+ url_pl = "http://pl.wiktionary.org/w/index.php?title=#{word}&action=edit"
70
+
71
+ if @is_polish
72
+ extract_english_examples(word)
73
+ else
74
+ []
75
+ end
76
+ end
77
+
78
+ # Returns an array containing usage examples of translated polish word to english.
79
+ def extract_english_examples(word)
80
+ word = word.gsub(/\s+\(.+$/,'') || ''
81
+ url_en = "http://en.wiktionary.org/w/index.php?title=#{word}&action=edit"
82
+ examples = /Noun[^\{]+\{\{en\-noun[^=]+/.match(get_html(url_en.gsub('{word}',word)).css('textarea#wpTextbox1').first)
83
+ return [] unless examples.instance_of?(MatchData)
84
+ examples = examples[0].scan(/#: ''([^\n]+)\n/)
85
+ examples.map! do |translation|
86
+ translation[0].gsub(/'{2,}/,'')
87
+ end
88
+ examples
89
+ end
90
+ end
91
+ end
data/lib/dict.rb CHANGED
@@ -1,5 +1,5 @@
1
- require 'dict/dict'
2
-
3
- module Dict
4
- autoload :VERSION, "dict/version"
5
- end
1
+ require 'dict/dict'
2
+
3
+ module Dict
4
+ autoload :VERSION, "dict/version"
5
+ end