dict 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -1
- data/.rspec +2 -2
- data/.travis.yml +14 -14
- data/Gemfile +4 -4
- data/Gemfile.lock +40 -40
- data/README.md +34 -34
- data/Rakefile +8 -8
- data/bin/dict +7 -7
- data/dict.gemspec +28 -28
- data/lib/dict/cli/runner.rb +137 -115
- data/lib/dict/dict.rb +40 -53
- data/lib/dict/dictionary.rb +45 -62
- data/lib/dict/glosbe.rb +68 -66
- data/lib/dict/result.rb +38 -38
- data/lib/dict/version.rb +3 -3
- data/lib/dict/wiktionary.rb +91 -91
- data/lib/dict.rb +5 -5
- data/spec/dict/lib_dict_cli_runner_spec.rb +120 -120
- data/spec/dict/lib_dict_spec.rb +39 -39
- data/spec/dict/lib_glosbe_spec.rb +48 -34
- data/spec/dict/lib_wiktionary_spec.rb +68 -62
- data/spec/dict/spec_helper.rb +17 -17
- data/spec/dict/vcr_cassettes/glosbe_translations_asdfff_cassette.yml +1518 -0
- data/spec/dict/vcr_cassettes/glosbe_translations_atomic_cassette.yml +2794 -2794
- data/spec/dict/vcr_cassettes/glosbe_translations_usage_cassette.yml +2530 -0
- data/spec/dict/vcr_cassettes/glosbe_translations_woda_cassette.yml +1949 -1949
- data/spec/dict/vcr_cassettes/slowik_runner_cassette.yml +4178 -4178
- data/spec/dict/vcr_cassettes/translations_dragon_cassette.yml +8659 -8659
- data/spec/dict/vcr_cassettes/translations_slownik_cassette.yml +4177 -4177
- data/spec/dict/vcr_cassettes/wiktionary_no_usage_examples.yml +8634 -0
- data/spec/dict/vcr_cassettes/wiktionary_translate_result_uppercase.yml +6120 -0
- data/spec/dict/vcr_cassettes/wiktionary_translations_field_cassette.yml +8600 -8600
- data/spec/dict/vcr_cassettes/wiktionary_translations_samochod_cassette.yml +6140 -6140
- data/spec/dict/vcr_cassettes/wiktionary_usage_examples_kot.yml +6444 -0
- data/spec/dict/vcr_setup.rb +19 -19
- metadata +9 -4
data/lib/dict/dictionary.rb
CHANGED
@@ -1,62 +1,45 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
# It is a base class for classes fetching results from Web dictionaries.
|
4
|
-
|
5
|
-
require 'open-uri'
|
6
|
-
require 'dict/result'
|
7
|
-
|
8
|
-
module Dict
|
9
|
-
class Dictionary
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
@
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
end
|
47
|
-
|
48
|
-
class ConnectError < Exception
|
49
|
-
attr_reader :original
|
50
|
-
|
51
|
-
def initialize(original = $!)
|
52
|
-
@original = original
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
private
|
57
|
-
# Returns a word with all downcased letters, including polish
|
58
|
-
def downcase_word(word)
|
59
|
-
word.downcase.gsub(/[ĄĆĘŁŃÓŚŹŻ]/, 'Ą' => 'ą', 'Ć' => 'ć', 'Ę' => 'ę', 'Ł' => 'ł', 'Ń' => 'ń', 'Ó' => 'ó', 'Ś' => 'ś', 'Ź' => 'ź', 'Ż' => 'ż')
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
# It is a base class for classes fetching results from Web dictionaries.
|
4
|
+
|
5
|
+
require 'open-uri'
|
6
|
+
require 'dict/result'
|
7
|
+
|
8
|
+
module Dict
|
9
|
+
class Dictionary
|
10
|
+
|
11
|
+
def initialize(word)
|
12
|
+
check_arguments(word)
|
13
|
+
@word = downcase_word(word)
|
14
|
+
@result = Dict::Result.new(@word)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns an instance of URI::HTTP class.
|
18
|
+
def uri(url, word = nil)
|
19
|
+
word == nil ? URI(URI.escape(url)) : URI(URI.escape(url + word.downcase.tr(' ', '_')))
|
20
|
+
end
|
21
|
+
|
22
|
+
# Checks if word was given correctly.
|
23
|
+
def check_arguments(word)
|
24
|
+
raise ArgumentError.new("No given word") if word.empty?
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.message
|
28
|
+
'There\'s no such dictionary in database.'
|
29
|
+
end
|
30
|
+
|
31
|
+
class ConnectError < Exception
|
32
|
+
attr_reader :original
|
33
|
+
|
34
|
+
def initialize(original = $!)
|
35
|
+
@original = original
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
# Returns a word with all downcased letters, including polish
|
41
|
+
def downcase_word(word)
|
42
|
+
word.downcase.gsub(/[ĄĆĘŁŃÓŚŹŻ]/, 'Ą' => 'ą', 'Ć' => 'ć', 'Ę' => 'ę', 'Ł' => 'ł', 'Ń' => 'ń', 'Ó' => 'ó', 'Ś' => 'ś', 'Ź' => 'ź', 'Ż' => 'ż')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/dict/glosbe.rb
CHANGED
@@ -1,66 +1,68 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
require 'nokogiri'
|
4
|
-
require 'dict/dictionary'
|
5
|
-
|
6
|
-
GLOSBE_PL = 'http://glosbe.com/pl/en/' # polish - english
|
7
|
-
GLOSBE_EN = 'http://glosbe.com/en/pl/' # english - polish
|
8
|
-
|
9
|
-
module Dict
|
10
|
-
class Glosbe < Dictionary
|
11
|
-
# returns an Dict::Result object
|
12
|
-
def translate
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
doc
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
end
|
40
|
-
|
41
|
-
# returns array with structure as shown below from the given dictionary link
|
42
|
-
# ['TRANSLATION1', 'TRANSLATION2', ...]
|
43
|
-
def get_translations(doc)
|
44
|
-
|
45
|
-
translations
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
#
|
55
|
-
#
|
56
|
-
|
57
|
-
|
58
|
-
examples
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
end
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'dict/dictionary'
|
5
|
+
|
6
|
+
GLOSBE_PL = 'http://glosbe.com/pl/en/' # polish - english
|
7
|
+
GLOSBE_EN = 'http://glosbe.com/en/pl/' # english - polish
|
8
|
+
|
9
|
+
module Dict
|
10
|
+
class Glosbe < Dictionary
|
11
|
+
# returns an Dict::Result object
|
12
|
+
def translate
|
13
|
+
if is_polish?(doc = get_content(GLOSBE_PL, @word))
|
14
|
+
add_translations(get_translations(doc))
|
15
|
+
add_examples(get_examples(doc, 'en'))
|
16
|
+
else
|
17
|
+
doc = get_content(GLOSBE_EN, @word)
|
18
|
+
add_translations(get_translations(doc))
|
19
|
+
add_examples(get_examples(doc, 'pl'))
|
20
|
+
end
|
21
|
+
|
22
|
+
@result
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# checks if given word is polish
|
28
|
+
def is_polish?(doc)
|
29
|
+
!doc.empty? && doc.at_css('.content_box_rounded p').nil?
|
30
|
+
end
|
31
|
+
|
32
|
+
# returns instance of Nokogiri::HTML module
|
33
|
+
def get_content(url, word)
|
34
|
+
begin
|
35
|
+
Nokogiri::HTML(open(uri(url, word))).css('.wordDetails')
|
36
|
+
rescue => e
|
37
|
+
""
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# returns array with structure as shown below from the given dictionary link
|
42
|
+
# ['TRANSLATION1', 'TRANSLATION2', ...]
|
43
|
+
def get_translations(doc)
|
44
|
+
translations = []
|
45
|
+
doc.css('.phrase-container > .translation').each { |translation| translations.push(translation.text.downcase) } if !doc.empty?
|
46
|
+
translations
|
47
|
+
end
|
48
|
+
|
49
|
+
# add obtained translations to Dict::Result object
|
50
|
+
def add_translations(translations)
|
51
|
+
translations.each { |translation| @result.add_translation(@result.term, translation) }
|
52
|
+
end
|
53
|
+
|
54
|
+
# returns array with structure as shown below from the given dictionary link
|
55
|
+
# ['EXAMPLE1', 'EXAMPLE2', ...]
|
56
|
+
# the default length of given example is 60 characters
|
57
|
+
def get_examples(doc, lang, length = 60)
|
58
|
+
examples = []
|
59
|
+
doc.css(".tranlastionMemory td[lang=#{lang}]").each { |example| examples.push(example.text.capitalize) if example.text.length < length } if !doc.empty?
|
60
|
+
examples
|
61
|
+
end
|
62
|
+
|
63
|
+
# add obtained examples to Dict::Result object
|
64
|
+
def add_examples(examples)
|
65
|
+
examples.each { |example| @result.add_example(@result.term, example) }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/dict/result.rb
CHANGED
@@ -1,38 +1,38 @@
|
|
1
|
-
# Objects of this class are returned by methods retrieving translations
|
2
|
-
# from Web dictionaries.
|
3
|
-
|
4
|
-
module Dict
|
5
|
-
class Result
|
6
|
-
attr_reader :term, :translations, :examples
|
7
|
-
|
8
|
-
def initialize(term)
|
9
|
-
@term = term
|
10
|
-
@translations = {}
|
11
|
-
@examples = {}
|
12
|
-
end
|
13
|
-
|
14
|
-
def add_translation(term, translation)
|
15
|
-
add_result(@translations, term, translation)
|
16
|
-
end
|
17
|
-
|
18
|
-
def add_example(term, example)
|
19
|
-
add_result(@examples, term, example)
|
20
|
-
end
|
21
|
-
|
22
|
-
def each_translation
|
23
|
-
@translations.each_pair do |term,translation|
|
24
|
-
yield term, translation
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
private
|
29
|
-
def add_result(hash, key, value)
|
30
|
-
if hash.has_key?(key)
|
31
|
-
hash[key].push(value)
|
32
|
-
else
|
33
|
-
hash.merge!({ key => [value] })
|
34
|
-
end
|
35
|
-
self
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
1
|
+
# Objects of this class are returned by methods retrieving translations
|
2
|
+
# from Web dictionaries.
|
3
|
+
|
4
|
+
module Dict
|
5
|
+
class Result
|
6
|
+
attr_reader :term, :translations, :examples
|
7
|
+
|
8
|
+
def initialize(term)
|
9
|
+
@term = term
|
10
|
+
@translations = {}
|
11
|
+
@examples = {}
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_translation(term, translation)
|
15
|
+
add_result(@translations, term, translation)
|
16
|
+
end
|
17
|
+
|
18
|
+
def add_example(term, example)
|
19
|
+
add_result(@examples, term, example)
|
20
|
+
end
|
21
|
+
|
22
|
+
def each_translation
|
23
|
+
@translations.each_pair do |term,translation|
|
24
|
+
yield term, translation
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
def add_result(hash, key, value)
|
30
|
+
if hash.has_key?(key)
|
31
|
+
hash[key].push(value)
|
32
|
+
else
|
33
|
+
hash.merge!({ key => [value] })
|
34
|
+
end
|
35
|
+
self
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/dict/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
module Dict
|
2
|
-
VERSION = "0.3.
|
3
|
-
end
|
1
|
+
module Dict
|
2
|
+
VERSION = "0.3.4"
|
3
|
+
end
|
data/lib/dict/wiktionary.rb
CHANGED
@@ -1,91 +1,91 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
# Class fetching translations of given word from wiktionary.org.
|
4
|
-
|
5
|
-
require 'nokogiri'
|
6
|
-
require 'dict/dictionary'
|
7
|
-
|
8
|
-
module Dict
|
9
|
-
class Wiktionary < Dictionary
|
10
|
-
|
11
|
-
# Returns an Dict::Result object.
|
12
|
-
|
13
|
-
translations.each do |translation|
|
14
|
-
@result.add_translation(@result.term, translation.gsub(/(\s[^|\s]+\|)/,' '))
|
15
|
-
examples(translation).each { |example| @result.add_example(translation, example) }
|
16
|
-
end
|
17
|
-
|
18
|
-
@result
|
19
|
-
end
|
20
|
-
|
21
|
-
def get_html(url)
|
22
|
-
begin
|
23
|
-
Nokogiri::HTML(open(URI.encode(url)))
|
24
|
-
rescue OpenURI::HTTPError
|
25
|
-
raise Dictionary::ConnectError
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
def polish?(content)
|
31
|
-
! /==Polish==/i.match(content).nil?
|
32
|
-
end
|
33
|
-
|
34
|
-
# Returns an array containing translations.
|
35
|
-
def translations
|
36
|
-
url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
37
|
-
url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
38
|
-
|
39
|
-
content_pl = get_html(url_pl).css('textarea#wpTextbox1').first
|
40
|
-
if polish?(content_pl)
|
41
|
-
@is_polish = true
|
42
|
-
extract_polish_translations(content_pl)
|
43
|
-
else
|
44
|
-
@is_polish = false
|
45
|
-
extract_english_translations(get_html(url_en).css('textarea#wpTextbox1').first.content)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
# Returns an array containing polish translations.
|
50
|
-
def extract_polish_translations(content)
|
51
|
-
translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
|
52
|
-
translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
|
53
|
-
end
|
54
|
-
|
55
|
-
# Returns an array containing english translations.
|
56
|
-
def extract_english_translations(content)
|
57
|
-
translations_block = /język\s+angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
|
58
|
-
return [] unless translations_block.instance_of?(MatchData)
|
59
|
-
translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
|
60
|
-
translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
|
61
|
-
translations.map! do |translation|
|
62
|
-
translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'|<.*/,'').strip
|
63
|
-
end
|
64
|
-
translations.delete_if(&:empty?)
|
65
|
-
translations ||= []
|
66
|
-
end
|
67
|
-
|
68
|
-
def examples(word)
|
69
|
-
url_pl = "http://pl.wiktionary.org/w/index.php?title=#{word}&action=edit"
|
70
|
-
|
71
|
-
if @is_polish
|
72
|
-
extract_english_examples(word)
|
73
|
-
else
|
74
|
-
[]
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# Returns an array containing usage examples of translated polish word to english.
|
79
|
-
def extract_english_examples(word)
|
80
|
-
word = word.gsub(/\s+\(.+$/,'') || ''
|
81
|
-
url_en = "http://en.wiktionary.org/w/index.php?title=#{word}&action=edit"
|
82
|
-
examples = /Noun[^\{]+\{\{en\-noun[^=]+/.match(get_html(url_en.gsub('{word}',word)).css('textarea#wpTextbox1').first)
|
83
|
-
return [] unless examples.instance_of?(MatchData)
|
84
|
-
examples = examples[0].scan(/#: ''([^\n]+)\n/)
|
85
|
-
examples.map! do |translation|
|
86
|
-
translation[0].gsub(/'{2,}/,'')
|
87
|
-
end
|
88
|
-
examples
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
# Class fetching translations of given word from wiktionary.org.
|
4
|
+
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'dict/dictionary'
|
7
|
+
|
8
|
+
module Dict
|
9
|
+
class Wiktionary < Dictionary
|
10
|
+
|
11
|
+
# Returns an Dict::Result object.
|
12
|
+
def translate
|
13
|
+
translations.each do |translation|
|
14
|
+
@result.add_translation(@result.term, translation.gsub(/(\s[^|\s]+\|)/,' '))
|
15
|
+
examples(translation).each { |example| @result.add_example(translation, example) }
|
16
|
+
end
|
17
|
+
|
18
|
+
@result
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_html(url)
|
22
|
+
begin
|
23
|
+
Nokogiri::HTML(open(URI.encode(url)))
|
24
|
+
rescue OpenURI::HTTPError
|
25
|
+
raise Dictionary::ConnectError
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
def polish?(content)
|
31
|
+
! /==Polish==/i.match(content).nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns an array containing translations.
|
35
|
+
def translations
|
36
|
+
url_pl = "http://en.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
37
|
+
url_en = "http://pl.wiktionary.org/w/index.php?title=#{@word}&action=edit"
|
38
|
+
|
39
|
+
content_pl = get_html(url_pl).css('textarea#wpTextbox1').first
|
40
|
+
if polish?(content_pl)
|
41
|
+
@is_polish = true
|
42
|
+
extract_polish_translations(content_pl)
|
43
|
+
else
|
44
|
+
@is_polish = false
|
45
|
+
extract_english_translations(get_html(url_en).css('textarea#wpTextbox1').first.content)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns an array containing polish translations.
|
50
|
+
def extract_polish_translations(content)
|
51
|
+
translations = /Noun[^\{]+\{\{(?:head\|pl|pl\-noun)[^#]+#\s*\[\[([^\n]+)/.match(content)
|
52
|
+
translations = (translations && translations[1].gsub(/\[|\]/,'').split(', ')) || []
|
53
|
+
end
|
54
|
+
|
55
|
+
# Returns an array containing english translations.
|
56
|
+
def extract_english_translations(content)
|
57
|
+
translations_block = /język\s+angielski(?:.|\n)+\{\{znaczenia\}\}(.|\n)+(?:\{\{odmiana){1,}/.match(content)
|
58
|
+
return [] unless translations_block.instance_of?(MatchData)
|
59
|
+
translations_block = translations_block[0].gsub(/odmiana(.|\n)+$/,'')
|
60
|
+
translations = translations_block.scan(/:\s*\(\d\.?\d?\)\s*([^\n]+)/)
|
61
|
+
translations.map! do |translation|
|
62
|
+
translation[0].gsub(/\[|\]|\{\{[^\}]+\}\}|'|<.*/,'').strip
|
63
|
+
end
|
64
|
+
translations.delete_if(&:empty?)
|
65
|
+
translations ||= []
|
66
|
+
end
|
67
|
+
|
68
|
+
def examples(word)
|
69
|
+
url_pl = "http://pl.wiktionary.org/w/index.php?title=#{word}&action=edit"
|
70
|
+
|
71
|
+
if @is_polish
|
72
|
+
extract_english_examples(word)
|
73
|
+
else
|
74
|
+
[]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns an array containing usage examples of translated polish word to english.
|
79
|
+
def extract_english_examples(word)
|
80
|
+
word = word.gsub(/\s+\(.+$/,'') || ''
|
81
|
+
url_en = "http://en.wiktionary.org/w/index.php?title=#{word}&action=edit"
|
82
|
+
examples = /Noun[^\{]+\{\{en\-noun[^=]+/.match(get_html(url_en.gsub('{word}',word)).css('textarea#wpTextbox1').first)
|
83
|
+
return [] unless examples.instance_of?(MatchData)
|
84
|
+
examples = examples[0].scan(/#: ''([^\n]+)\n/)
|
85
|
+
examples.map! do |translation|
|
86
|
+
translation[0].gsub(/'{2,}/,'')
|
87
|
+
end
|
88
|
+
examples
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/dict.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require 'dict/dict'
|
2
|
-
|
3
|
-
module Dict
|
4
|
-
autoload :VERSION, "dict/version"
|
5
|
-
end
|
1
|
+
require 'dict/dict'
|
2
|
+
|
3
|
+
module Dict
|
4
|
+
autoload :VERSION, "dict/version"
|
5
|
+
end
|