cambridge_dict 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -6
- data/lib/cambridge_dict/version.rb +1 -1
- data/lib/cambridge_dict.rb +22 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f6ced5f541aa52f47a68c54450b7763bbf904a6ddb61287b9c0fe9affbf1e5c9
|
4
|
+
data.tar.gz: 3f9e664f60ad00d6afc9ff97bbc674133fa4a7deb1294d2f3c48e41b1e035ccb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 77dd6ddc26ec659636cfbc166acbea179be8cb80eeabebfbe4e125360d440cb7d97bd50ad04b6b4e943dd99bb318647112fa8d0019c132e4dd549d7d98dbaba9
|
7
|
+
data.tar.gz: 8b63b295e1dd76f3def1a961f0952b54ad831d46b79e65be92f5d0fd49573ae7f2ed8e962a27b4a5e72ce5ada3d1d99ebb24801b2b5cef8d95959fbcc87d0190
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Cambridge Dict
|
2
2
|
|
3
|
-
Look up words for
|
3
|
+
Look up words for Cambridge via code or web server.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -12,13 +12,13 @@ gem install cambridge_dict
|
|
12
12
|
|
13
13
|
```
|
14
14
|
# throws error
|
15
|
-
|
15
|
+
result = CambridgeDict::Client.lookup! "hello", language: "en"
|
16
16
|
|
17
17
|
# returns nil if not found
|
18
|
-
|
18
|
+
result = CambridgeDict::Client.lookup "hello", language: "en"
|
19
19
|
|
20
20
|
# can also create instance
|
21
|
-
client =
|
21
|
+
client = CambridgeDict::Client.new
|
22
22
|
result = client.lookup("back", language: "en")
|
23
23
|
|
24
24
|
return if result.nil?
|
@@ -35,16 +35,17 @@ return if result.nil?
|
|
35
35
|
{:id=>5, :type=>"Past tense", :text=>"backed"},
|
36
36
|
{:id=>6, :type=>"Past participle", :text=>"backed"},
|
37
37
|
{:id=>7, :type=>"Present participle", :text=>"backing"}],
|
38
|
-
:
|
38
|
+
:pronunciations=>
|
39
39
|
[{:pos=>"adverb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
|
40
40
|
{:pos=>"adverb", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
|
41
41
|
{:pos=>"noun", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
|
42
42
|
{:pos=>"noun", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
|
43
43
|
{:pos=>"verb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
|
44
44
|
...
|
45
|
-
:
|
45
|
+
:definitions=>
|
46
46
|
[{:id=>0,
|
47
47
|
:pos=>"adverb",
|
48
|
+
:headword=>"back",
|
48
49
|
:source=>"cald4-us",
|
49
50
|
:text=>"in, into, or toward a previous place or condition, or an earlier time: ",
|
50
51
|
:translation=>nil,
|
@@ -59,6 +60,7 @@ return if result.nil?
|
|
59
60
|
{:id=>7, :text=>"This tradition dates back to the 16th century.", :translation=>nil}]},
|
60
61
|
{:id=>1,
|
61
62
|
:pos=>"adverb",
|
63
|
+
:headword=>"back",
|
62
64
|
:source=>"cald4-us",
|
63
65
|
:text=>"in return: ",
|
64
66
|
:translation=>nil,
|
@@ -75,6 +77,8 @@ require 'cambridge_dict/server'
|
|
75
77
|
run CambridgeDict::Server
|
76
78
|
|
77
79
|
rackup config.ru
|
80
|
+
|
81
|
+
# curl localhost:9292/api/dictionary/:language/:word
|
78
82
|
```
|
79
83
|
|
80
84
|
## Development
|
data/lib/cambridge_dict.rb
CHANGED
@@ -7,6 +7,8 @@ module CambridgeDict
|
|
7
7
|
class Error < StandardError; end
|
8
8
|
|
9
9
|
class Client
|
10
|
+
PARSER = URI::Parser.new
|
11
|
+
|
10
12
|
def initialize
|
11
13
|
@base_url = "https://dictionary.cambridge.org"
|
12
14
|
@wiki_base = "https://simple.wiktionary.org/wiki"
|
@@ -22,7 +24,8 @@ module CambridgeDict
|
|
22
24
|
|
23
25
|
def lookup(word, language: 'en')
|
24
26
|
nation, dict_lang = parse_language(language)
|
25
|
-
|
27
|
+
encoded_word = word.gsub /[\s']/, "-"
|
28
|
+
url = "#{@base_url}/#{nation}/dictionary/#{dict_lang}/#{encoded_word}"
|
26
29
|
|
27
30
|
response = HTTParty.get(url, {
|
28
31
|
headers: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3"},
|
@@ -30,7 +33,9 @@ module CambridgeDict
|
|
30
33
|
return nil unless response.success?
|
31
34
|
|
32
35
|
doc = Nokogiri::HTML(response.body)
|
33
|
-
|
36
|
+
|
37
|
+
encoded_word = PARSER.escape(word)
|
38
|
+
wiki_verbs = fetch_verbs("#{@wiki_base}/#{encoded_word}")
|
34
39
|
|
35
40
|
word = extract_word(doc)
|
36
41
|
|
@@ -40,8 +45,8 @@ module CambridgeDict
|
|
40
45
|
word: word,
|
41
46
|
pos: extract_pos(doc),
|
42
47
|
verbs: wiki_verbs,
|
43
|
-
|
44
|
-
|
48
|
+
pronunciations: extract_pronunciation(doc),
|
49
|
+
definitions: extract_definitions(doc)
|
45
50
|
}
|
46
51
|
end
|
47
52
|
|
@@ -73,7 +78,9 @@ module CambridgeDict
|
|
73
78
|
end
|
74
79
|
|
75
80
|
def fetch_verbs(url)
|
76
|
-
response = HTTParty.get(url
|
81
|
+
response = HTTParty.get(url, {
|
82
|
+
headers: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3"},
|
83
|
+
})
|
77
84
|
return [] unless response.success?
|
78
85
|
|
79
86
|
doc = Nokogiri::HTML(response.body)
|
@@ -92,7 +99,7 @@ module CambridgeDict
|
|
92
99
|
end
|
93
100
|
|
94
101
|
def extract_word(doc)
|
95
|
-
doc.at_css('.
|
102
|
+
doc.at_css('.superentry .tb')&.text
|
96
103
|
end
|
97
104
|
|
98
105
|
def extract_pos(doc)
|
@@ -139,13 +146,16 @@ module CambridgeDict
|
|
139
146
|
end
|
140
147
|
|
141
148
|
pos = entry&.at_css('.pos.dpos')&.text
|
149
|
+
headword = entry&.at_css('.headword .hw')&.text
|
150
|
+
text = clean_string block.at_css('.def.ddef_d.db')&.text&.strip
|
142
151
|
|
143
152
|
definition = {
|
144
153
|
id: index,
|
154
|
+
headword: headword,
|
145
155
|
pos: pos,
|
146
156
|
source: source || nil,
|
147
|
-
text:
|
148
|
-
translation: block.at_css('.def-body.ddef_b > span.trans.dtrans')&.text,
|
157
|
+
text: text,
|
158
|
+
translation: block.at_css('.def-body.ddef_b > span.trans.dtrans')&.text&.strip,
|
149
159
|
example: extract_examples(block)
|
150
160
|
}
|
151
161
|
|
@@ -168,6 +178,10 @@ module CambridgeDict
|
|
168
178
|
|
169
179
|
examples
|
170
180
|
end
|
181
|
+
|
182
|
+
def clean_string(input)
|
183
|
+
input&.gsub(/\s+/, ' ')&.strip
|
184
|
+
end
|
171
185
|
end
|
172
186
|
|
173
187
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cambridge_dict
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben D'Angelo
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|