dict 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data/.gitignore +1 -1
  2. data/.rspec +2 -2
  3. data/.travis.yml +14 -14
  4. data/Gemfile +4 -4
  5. data/Gemfile.lock +40 -40
  6. data/README.md +34 -34
  7. data/Rakefile +8 -8
  8. data/bin/dict +7 -7
  9. data/dict.gemspec +28 -28
  10. data/lib/dict/cli/runner.rb +137 -115
  11. data/lib/dict/dict.rb +40 -53
  12. data/lib/dict/dictionary.rb +45 -62
  13. data/lib/dict/glosbe.rb +68 -66
  14. data/lib/dict/result.rb +38 -38
  15. data/lib/dict/version.rb +3 -3
  16. data/lib/dict/wiktionary.rb +91 -91
  17. data/lib/dict.rb +5 -5
  18. data/spec/dict/lib_dict_cli_runner_spec.rb +120 -120
  19. data/spec/dict/lib_dict_spec.rb +39 -39
  20. data/spec/dict/lib_glosbe_spec.rb +48 -34
  21. data/spec/dict/lib_wiktionary_spec.rb +68 -62
  22. data/spec/dict/spec_helper.rb +17 -17
  23. data/spec/dict/vcr_cassettes/glosbe_translations_asdfff_cassette.yml +1518 -0
  24. data/spec/dict/vcr_cassettes/glosbe_translations_atomic_cassette.yml +2794 -2794
  25. data/spec/dict/vcr_cassettes/glosbe_translations_usage_cassette.yml +2530 -0
  26. data/spec/dict/vcr_cassettes/glosbe_translations_woda_cassette.yml +1949 -1949
  27. data/spec/dict/vcr_cassettes/slowik_runner_cassette.yml +4178 -4178
  28. data/spec/dict/vcr_cassettes/translations_dragon_cassette.yml +8659 -8659
  29. data/spec/dict/vcr_cassettes/translations_slownik_cassette.yml +4177 -4177
  30. data/spec/dict/vcr_cassettes/wiktionary_no_usage_examples.yml +8634 -0
  31. data/spec/dict/vcr_cassettes/wiktionary_translate_result_uppercase.yml +6120 -0
  32. data/spec/dict/vcr_cassettes/wiktionary_translations_field_cassette.yml +8600 -8600
  33. data/spec/dict/vcr_cassettes/wiktionary_translations_samochod_cassette.yml +6140 -6140
  34. data/spec/dict/vcr_cassettes/wiktionary_usage_examples_kot.yml +6444 -0
  35. data/spec/dict/vcr_setup.rb +19 -19
  36. metadata +9 -4
@@ -1,62 +1,45 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # It is a base class for classes fetching results from Web dictionaries.
4
-
5
- require 'open-uri'
6
- require 'dict/result'
7
-
8
- module Dict
9
- class Dictionary
10
- attr_accessor :translations, :examples
11
-
12
- def initialize(word)
13
- check_arguments(word)
14
- @translations = []
15
- @examples = []
16
- @word = downcase_word(word)
17
- @result = Dict::Result.new(@word)
18
- end
19
-
20
- # Returns hash with structure as showed below
21
- # { 'TRANSLATION' => ['EXAMPLE', ...], ... }
22
- def make_hash_results(arr)
23
- hash = arr.each_slice(2).inject({}) do |h, (key, value)|
24
- if h.has_key?(key)
25
- h[key].push(value) ; h
26
- else
27
- h[key] = [value] ; h
28
- end
29
- end
30
- @translations, @examples = hash.keys, hash.values
31
- hash
32
- end
33
-
34
- # Returns an instance of URI::HTTP class.
35
- def uri(url, word = nil)
36
- word == nil ? URI(URI.escape(url)) : URI(URI.escape(url + word.downcase.tr(' ', '_')))
37
- end
38
-
39
- # Checks if word was given correctly.
40
- def check_arguments(word)
41
- raise ArgumentError.new("No given word") if word.empty?
42
- end
43
-
44
- def self.message
45
- 'There\'s no such dictionary in database.'
46
- end
47
-
48
- class ConnectError < Exception
49
- attr_reader :original
50
-
51
- def initialize(original = $!)
52
- @original = original
53
- end
54
- end
55
-
56
- private
57
- # Returns a word with all downcased letters, including polish
58
- def downcase_word(word)
59
- word.downcase.gsub(/[ĄĆĘŁŃÓŚŹŻ]/, 'Ą' => 'ą', 'Ć' => 'ć', 'Ę' => 'ę', 'Ł' => 'ł', 'Ń' => 'ń', 'Ó' => 'ó', 'Ś' => 'ś', 'Ź' => 'ź', 'Ż' => 'ż')
60
- end
61
- end
62
- end
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # It is a base class for classes fetching results from Web dictionaries.
4
+
5
+ require 'open-uri'
6
+ require 'dict/result'
7
+
8
+ module Dict
9
+ class Dictionary
10
+
11
+ def initialize(word)
12
+ check_arguments(word)
13
+ @word = downcase_word(word)
14
+ @result = Dict::Result.new(@word)
15
+ end
16
+
17
+ # Returns an instance of URI::HTTP class.
18
+ def uri(url, word = nil)
19
+ word == nil ? URI(URI.escape(url)) : URI(URI.escape(url + word.downcase.tr(' ', '_')))
20
+ end
21
+
22
+ # Checks if word was given correctly.
23
+ def check_arguments(word)
24
+ raise ArgumentError.new("No given word") if word.empty?
25
+ end
26
+
27
+ def self.message
28
+ 'There\'s no such dictionary in database.'
29
+ end
30
+
31
+ class ConnectError < Exception
32
+ attr_reader :original
33
+
34
+ def initialize(original = $!)
35
+ @original = original
36
+ end
37
+ end
38
+
39
+ private
40
+ # Returns a word with all downcased letters, including polish
41
+ def downcase_word(word)
42
+ word.downcase.gsub(/[ĄĆĘŁŃÓŚŹŻ]/, 'Ą' => 'ą', 'Ć' => 'ć', 'Ę' => 'ę', 'Ł' => 'ł', 'Ń' => 'ń', 'Ó' => 'ó', 'Ś' => 'ś', 'Ź' => 'ź', 'Ż' => 'ż')
43
+ end
44
+ end
45
+ end
data/lib/dict/glosbe.rb CHANGED
@@ -1,66 +1,68 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require 'nokogiri'
4
- require 'dict/dictionary'
5
-
6
- GLOSBE_PL = 'http://glosbe.com/pl/en/' # polish - english
7
- GLOSBE_EN = 'http://glosbe.com/en/pl/' # english - polish
8
-
9
- module Dict
10
- class Glosbe < Dictionary
11
- # returns an Dict::Result object
12
- def translate
13
- begin
14
- if is_polish?(doc = get_content(GLOSBE_PL, @word))
15
- add_translations(get_translations(doc))
16
- add_examples(get_examples(doc, 'en'))
17
- else
18
- doc = get_content(GLOSBE_EN, @word)
19
- add_translations(get_translations(doc))
20
- add_examples(get_examples(doc, 'pl'))
21
- end
22
-
23
- @result
24
- rescue OpenURI::HTTPError
25
- raise Dictionary::ConnectError
26
- end
27
- end
28
-
29
- private
30
-
31
- # checks if given word is polish
32
- def is_polish?(doc)
33
- doc.at_css('.content_box_rounded p').nil?
34
- end
35
-
36
- # returns a html structure of visited site
37
- def get_content(url, word)
38
- Nokogiri::HTML(open(uri(url, word))).css('.wordDetails')
39
- end
40
-
41
- # returns array with structure as shown below from the given dictionary link
42
- # ['TRANSLATION1', 'TRANSLATION2', ...]
43
- def get_translations(doc)
44
- doc.css('.phrase-container > .translation').each { |translation| translations.push(translation.text.downcase) }
45
- translations
46
- end
47
-
48
- # add obtained translations to Dict::Result object
49
- def add_translations(translations)
50
- translations.each { |translation| @result.add_translation(@result.term, translation) }
51
- end
52
-
53
- # returns array with structure as shown below from the given dictionary link
54
- # ['EXAMPLE1', 'EXAMPLE2', ...]
55
- # the default length of given example is 60 characters
56
- def get_examples(doc, lang, length = 60)
57
- doc.css(".tranlastionMemory td[lang=#{lang}]").each { |example| examples.push(example.text.capitalize) if example.text.length < length }
58
- examples
59
- end
60
-
61
- # add obtained examples to Dict::Result object
62
- def add_examples(examples)
63
- examples.each { |example| @result.add_example(@result.term, example) }
64
- end
65
- end
66
- end
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'nokogiri'
4
+ require 'dict/dictionary'
5
+
6
+ GLOSBE_PL = 'http://glosbe.com/pl/en/' # polish - english
7
+ GLOSBE_EN = 'http://glosbe.com/en/pl/' # english - polish
8
+
9
+ module Dict
10
+ class Glosbe < Dictionary
11
+ # returns an Dict::Result object
12
+ def translate
13
+ if is_polish?(doc = get_content(GLOSBE_PL, @word))
14
+ add_translations(get_translations(doc))
15
+ add_examples(get_examples(doc, 'en'))
16
+ else
17
+ doc = get_content(GLOSBE_EN, @word)
18
+ add_translations(get_translations(doc))
19
+ add_examples(get_examples(doc, 'pl'))
20
+ end
21
+
22
+ @result
23
+ end
24
+
25
+ private
26
+
27
+ # checks if given word is polish
28
+ def is_polish?(doc)
29
+ !doc.empty? && doc.at_css('.content_box_rounded p').nil?
30
+ end
31
+
32
+ # returns instance of Nokogiri::HTML module
33
+ def get_content(url, word)
34
+ begin
35
+ Nokogiri::HTML(open(uri(url, word))).css('.wordDetails')
36
+ rescue => e
37
+ ""
38
+ end
39
+ end
40
+
41
+ # returns array with structure as shown below from the given dictionary link
42
+ # ['TRANSLATION1', 'TRANSLATION2', ...]
43
+ def get_translations(doc)
44
+ translations = []
45
+ doc.css('.phrase-container > .translation').each { |translation| translations.push(translation.text.downcase) } if !doc.empty?
46
+ translations
47
+ end
48
+
49
+ # add obtained translations to Dict::Result object
50
+ def add_translations(translations)
51
+ translations.each { |translation| @result.add_translation(@result.term, translation) }
52
+ end
53
+
54
+ # returns array with structure as shown below from the given dictionary link
55
+ # ['EXAMPLE1', 'EXAMPLE2', ...]
56
+ # the default length of given example is 60 characters
57
+ def get_examples(doc, lang, length = 60)
58
+ examples = []
59
+ doc.css(".tranlastionMemory td[lang=#{lang}]").each { |example| examples.push(example.text.capitalize) if example.text.length < length } if !doc.empty?
60
+ examples
61
+ end
62
+
63
+ # add obtained examples to Dict::Result object
64
+ def add_examples(examples)
65
+ examples.each { |example| @result.add_example(@result.term, example) }
66
+ end
67
+ end
68
+ end
data/lib/dict/result.rb CHANGED
@@ -1,38 +1,38 @@
1
- # Objects of this class are returned by methods retrieving translations
2
- # from Web dictionaries.
3
-
4
- module Dict
5
- class Result
6
- attr_reader :term, :translations, :examples
7
-
8
- def initialize(term)
9
- @term = term
10
- @translations = {}
11
- @examples = {}
12
- end
13
-
14
- def add_translation(term, translation)
15
- add_result(@translations, term, translation)
16
- end
17
-
18
- def add_example(term, example)
19
- add_result(@examples, term, example)
20
- end
21
-
22
- def each_translation
23
- @translations.each_pair do |term,translation|
24
- yield term, translation
25
- end
26
- end
27
-
28
- private
29
- def add_result(hash, key, value)
30
- if hash.has_key?(key)
31
- hash[key].push(value)
32
- else
33
- hash.merge!({ key => [value] })
34
- end
35
- self
36
- end
37
- end
38
- end
1
+ # Objects of this class are returned by methods retrieving translations
2
+ # from Web dictionaries.
3
+
4
+ module Dict
5
+ class Result
6
+ attr_reader :term, :translations, :examples
7
+
8
+ def initialize(term)
9
+ @term = term
10
+ @translations = {}
11
+ @examples = {}
12
+ end
13
+
14
+ def add_translation(term, translation)
15
+ add_result(@translations, term, translation)
16
+ end
17
+
18
+ def add_example(term, example)
19
+ add_result(@examples, term, example)
20
+ end
21
+
22
+ def each_translation
23
+ @translations.each_pair do |term,translation|
24
+ yield term, translation
25
+ end
26
+ end
27
+
28
+ private
29
+ def add_result(hash, key, value)
30
+ if hash.has_key?(key)
31
+ hash[key].push(value)
32
+ else
33
+ hash.merge!({ key => [value] })
34
+ end
35
+ self
36
+ end
37
+ end
38
+ end
data/lib/dict/version.rb CHANGED
@@ -1,3 +1,3 @@
1
- module Dict
2
- VERSION = "0.3.3"
3
- end
1
+ module Dict
2
+ VERSION = "0.3.4"
3
+ end
@@ -1,91 +1,91 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- # Class fetching translations of given word from wiktionary.org.
4
-
5
- require 'nokogiri'
6
- require 'dict/dictionary'
7
-
8
- module Dict
9
- class Wiktionary < Dictionary
10
-
11
- # Returns an Dict::Result object.
12
- def translate
13
- translations.each do |translation|
14
- @result.add_translation(@result.term, translation.gsub(/(\s[^|\s]+\|)/,' '))
15
- examples(translation).each { |example| @result.add_example(translation, example) }
16
- end
17
-
18
- @result
19
- end
20
-
21
- def get_html(url)
22
- begin
23
- Nokogiri::HTML(open(URI.encode(url)))
24
- rescue OpenURI::HTTPError
25
- raise Dictionary::ConnectError
26
- end
27
- end
28
-
29
- private
30
- def polish?(content)
31
- ! /==Polish==/i.match(content).nil?
32
- end
33
-
34
- # Returns an array containing translations.
35
- def translations
36
- url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
37
- url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
38
-
39
- content_pl = get_html(url_pl).css('textarea#wpTextbox1').first
40
- if polish?(content_pl)
41
- @is_polish = true
42
- extract_polish_translations(content_pl)
43
- else
44
- @is_polish = false
45
- extract_english_translations(get_html(url_en).css('textarea#wpTextbox1').first.content)
46
- end
47
- end
48
-
49
- # Returns an array containing polish translations.
50
- def extract_polish_translations(content)
51
- translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
52
- translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
53
- end
54
-
55
- # Returns an array containing english translations.
56
- def extract_english_translations(content)
57
- translations_block = /język\s+angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
58
- return [] unless translations_block.instance_of?(MatchData)
59
- translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
60
- translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
61
- translations.map! do |translation|
62
- translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'|<.*/,'').strip
63
- end
64
- translations.delete_if(&:empty?)
65
- translations ||= []
66
- end
67
-
68
- def examples(word)
69
- url_pl = "http://pl.wiktionary.org/w/index.php?title=#{word}&action=edit"
70
-
71
- if @is_polish
72
- extract_english_examples(word)
73
- else
74
- []
75
- end
76
- end
77
-
78
- # Returns an array containing usage examples of translated polish word to english.
79
- def extract_english_examples(word)
80
- word = word.gsub(/\s+\(.+$/,'') || ''
81
- url_en = "http://en.wiktionary.org/w/index.php?title=#{word}&action=edit"
82
- examples = /Noun[^\{]+\{\{en\-noun[^=]+/.match(get_html(url_en.gsub('{word}',word)).css('textarea#wpTextbox1').first)
83
- return [] unless examples.instance_of?(MatchData)
84
- examples = examples[0].scan(/#: ''([^\n]+)\n/)
85
- examples.map! do |translation|
86
- translation[0].gsub(/'{2,}/,'')
87
- end
88
- examples
89
- end
90
- end
91
- end
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # Class fetching translations of given word from wiktionary.org.
4
+
5
+ require 'nokogiri'
6
+ require 'dict/dictionary'
7
+
8
+ module Dict
9
+ class Wiktionary < Dictionary
10
+
11
+ # Returns an Dict::Result object.
12
+ def translate
13
+ translations.each do |translation|
14
+ @result.add_translation(@result.term, translation.gsub(/(\s[^|\s]+\|)/,' '))
15
+ examples(translation).each { |example| @result.add_example(translation, example) }
16
+ end
17
+
18
+ @result
19
+ end
20
+
21
+ def get_html(url)
22
+ begin
23
+ Nokogiri::HTML(open(URI.encode(url)))
24
+ rescue OpenURI::HTTPError
25
+ raise Dictionary::ConnectError
26
+ end
27
+ end
28
+
29
+ private
30
+ def polish?(content)
31
+ ! /==Polish==/i.match(content).nil?
32
+ end
33
+
34
+ # Returns an array containing translations.
35
+ def translations
36
+ url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
37
+ url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
38
+
39
+ content_pl = get_html(url_pl).css('textarea#wpTextbox1').first
40
+ if polish?(content_pl)
41
+ @is_polish = true
42
+ extract_polish_translations(content_pl)
43
+ else
44
+ @is_polish = false
45
+ extract_english_translations(get_html(url_en).css('textarea#wpTextbox1').first.content)
46
+ end
47
+ end
48
+
49
+ # Returns an array containing polish translations.
50
+ def extract_polish_translations(content)
51
+ translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
52
+ translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
53
+ end
54
+
55
+ # Returns an array containing english translations.
56
+ def extract_english_translations(content)
57
+ translations_block = /język\s+angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
58
+ return [] unless translations_block.instance_of?(MatchData)
59
+ translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
60
+ translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
61
+ translations.map! do |translation|
62
+ translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'|<.*/,'').strip
63
+ end
64
+ translations.delete_if(&:empty?)
65
+ translations ||= []
66
+ end
67
+
68
+ def examples(word)
69
+ url_pl = "http://pl.wiktionary.org/w/index.php?title=#{word}&action=edit"
70
+
71
+ if @is_polish
72
+ extract_english_examples(word)
73
+ else
74
+ []
75
+ end
76
+ end
77
+
78
+ # Returns an array containing usage examples of translated polish word to english.
79
+ def extract_english_examples(word)
80
+ word = word.gsub(/\s+\(.+$/,'') || ''
81
+ url_en = "http://en.wiktionary.org/w/index.php?title=#{word}&action=edit"
82
+ examples = /Noun[^\{]+\{\{en\-noun[^=]+/.match(get_html(url_en.gsub('{word}',word)).css('textarea#wpTextbox1').first)
83
+ return [] unless examples.instance_of?(MatchData)
84
+ examples = examples[0].scan(/#: ''([^\n]+)\n/)
85
+ examples.map! do |translation|
86
+ translation[0].gsub(/'{2,}/,'')
87
+ end
88
+ examples
89
+ end
90
+ end
91
+ end
data/lib/dict.rb CHANGED
@@ -1,5 +1,5 @@
1
- require 'dict/dict'
2
-
3
- module Dict
4
- autoload :VERSION, "dict/version"
5
- end
1
+ require 'dict/dict'
2
+
3
+ module Dict
4
+ autoload :VERSION, "dict/version"
5
+ end