cambridge_dict 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 294c128a929285511c42b782899cdf08045b3cc3c6034bf89fbabb5ca6cfed0b
4
- data.tar.gz: ca2757faaaf9658699190234a391c6f69208a30e7f022e8d0f9cb0139f2101fc
3
+ metadata.gz: 8c7205facd81e402d36a810dc1b595547207b0f6e3168193fd4bcb56b5c2f9ee
4
+ data.tar.gz: 29adecbe8c3d0f96e59f8b43ff1b34fc093291af9a2649568ad716104aa78792
5
5
  SHA512:
6
- metadata.gz: 7f227d3434d68a7988df29fca488fa3952bf6c84a2455246e858888fdde7e22a14959061d8ce1dc346f610a4e00e48c8dd29cd4a5b7094418457bd2373f38eef
7
- data.tar.gz: b15a564112405d640aab362d816d66e4fa724fcc7ea40f51acf52e0a7e56147913fab901ad16eaf1c42e586b236a59ef8250a13d448f3a6ebe077f95ea025319
6
+ metadata.gz: 7dfa104d86a45b96b7dac3b7a97beb99c14f662ddc25b58b2e58e2e262d82f8730458b39e5e6ffb2aca9824188bb531e54bbcd38f0841e4777c561777269c13c
7
+ data.tar.gz: 2e8f2f7f5ceac4f88568b7752d950add874c99472671b5ff04e86c3330042de34774462eb6a42a0833b1d252ed808619c455a5dc24d80dca82b42e324c629673
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Cambridge Dict
2
2
 
3
- Look up words for cambridge via code or web server.
3
+ Look up words for Cambridge via code or web server.
4
4
 
5
5
  ## Installation
6
6
 
@@ -35,16 +35,17 @@ return if result.nil?
35
35
  {:id=>5, :type=>"Past tense", :text=>"backed"},
36
36
  {:id=>6, :type=>"Past participle", :text=>"backed"},
37
37
  {:id=>7, :type=>"Present participle", :text=>"backing"}],
38
- :pronunciation=>
38
+ :pronunciations=>
39
39
  [{:pos=>"adverb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
40
40
  {:pos=>"adverb", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
41
41
  {:pos=>"noun", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
42
42
  {:pos=>"noun", :lang=>"uk", :url=>"https://dictionary.cambridge.org/us/media/english/uk_pron/u/uka/ukazt/ukazt__024.mp3", :pron=>"/bæk/"},
43
43
  {:pos=>"verb", :lang=>"us", :url=>"https://dictionary.cambridge.org/us/media/english/us_pron/b/bac/back_/back.mp3", :pron=>"/bæk/"},
44
44
  ...
45
- :definition=>
45
+ :definitions=>
46
46
  [{:id=>0,
47
47
  :pos=>"adverb",
48
+ :headword=>"back",
48
49
  :source=>"cald4-us",
49
50
  :text=>"in, into, or toward a previous place or condition, or an earlier time: ",
50
51
  :translation=>nil,
@@ -59,6 +60,7 @@ return if result.nil?
59
60
  {:id=>7, :text=>"This tradition dates back to the 16th century.", :translation=>nil}]},
60
61
  {:id=>1,
61
62
  :pos=>"adverb",
63
+ :headword=>"back",
62
64
  :source=>"cald4-us",
63
65
  :text=>"in return: ",
64
66
  :translation=>nil,
@@ -67,11 +69,16 @@ return if result.nil?
67
69
  ## Webserver Setup
68
70
 
69
71
  ```
72
+ # add sinatra and sinatra-cors to gemfile
73
+
70
74
  # config.ru
71
- require 'cambridge_dictionary'
72
- run CambridgeDictionary::API
75
+ require 'cambridge_dict'
76
+ require 'cambridge_dict/server'
77
+ run CambridgeDict::Server
78
+
79
+ rackup config.ru
73
80
 
74
- ruby config.ru
81
+ # curl localhost:9292/api/dictionary/:language/:word
75
82
  ```
76
83
 
77
84
  ## Development
data/config.ru CHANGED
@@ -1,2 +1,3 @@
1
1
  require './lib/cambridge_dict'
2
- run CambridgeDictionary::API
2
+ require './lib/cambridge_dict/server'
3
+ run CambridgeDict::Server
@@ -0,0 +1,29 @@
1
+ require 'sinatra'
2
+ require 'sinatra/cors'
3
+
4
+ module CambridgeDict
5
+ class Server < Sinatra::Base
6
+ register Sinatra::Cors
7
+
8
+ set :allow_origin, "*"
9
+ set :allow_methods, "GET,HEAD,POST"
10
+ set :allow_headers, "content-type,if-modified-since"
11
+ set :expose_headers, "location,link"
12
+
13
+ client = Client.new
14
+
15
+ get '/api/dictionary/:language/:entry' do
16
+ content_type :json
17
+
18
+ result = client.lookup(params[:entry], language: params[:language])
19
+
20
+ if result && !result[:word].empty?
21
+ status 200
22
+ result.to_json
23
+ else
24
+ status 404
25
+ {error: "word not found"}.to_json
26
+ end
27
+ end
28
+ end
29
+ end
@@ -1,3 +1,3 @@
1
1
  module CambridgeDict
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.2"
3
3
  end
@@ -1,13 +1,14 @@
1
1
  require 'nokogiri'
2
2
  require 'httparty'
3
- require 'sinatra'
4
- require 'sinatra/cors'
5
- require 'cambridge_dict/version'
3
+
4
+ require_relative 'cambridge_dict/version'
6
5
 
7
6
  module CambridgeDict
8
7
  class Error < StandardError; end
9
8
 
10
9
  class Client
10
+ PARSER = URI::Parser.new
11
+
11
12
  def initialize
12
13
  @base_url = "https://dictionary.cambridge.org"
13
14
  @wiki_base = "https://simple.wiktionary.org/wiki"
@@ -23,7 +24,8 @@ module CambridgeDict
23
24
 
24
25
  def lookup(word, language: 'en')
25
26
  nation, dict_lang = parse_language(language)
26
- url = "#{@base_url}/#{nation}/dictionary/#{dict_lang}/#{word}"
27
+ encoded_word = word.gsub /[\s']/, "-"
28
+ url = "#{@base_url}/#{nation}/dictionary/#{dict_lang}/#{encoded_word}"
27
29
 
28
30
  response = HTTParty.get(url, {
29
31
  headers: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3"},
@@ -31,7 +33,9 @@ module CambridgeDict
31
33
  return nil unless response.success?
32
34
 
33
35
  doc = Nokogiri::HTML(response.body)
34
- wiki_verbs = fetch_verbs("#{@wiki_base}/#{word}")
36
+
37
+ encoded_word = PARSER.escape(word)
38
+ wiki_verbs = fetch_verbs("#{@wiki_base}/#{encoded_word}")
35
39
 
36
40
  word = extract_word(doc)
37
41
 
@@ -41,8 +45,8 @@ module CambridgeDict
41
45
  word: word,
42
46
  pos: extract_pos(doc),
43
47
  verbs: wiki_verbs,
44
- pronunciation: extract_pronunciation(doc),
45
- definition: extract_definitions(doc)
48
+ pronunciations: extract_pronunciation(doc),
49
+ definitions: extract_definitions(doc)
46
50
  }
47
51
  end
48
52
 
@@ -74,7 +78,9 @@ module CambridgeDict
74
78
  end
75
79
 
76
80
  def fetch_verbs(url)
77
- response = HTTParty.get(url)
81
+ response = HTTParty.get(url, {
82
+ headers: {"User-Agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.3"},
83
+ })
78
84
  return [] unless response.success?
79
85
 
80
86
  doc = Nokogiri::HTML(response.body)
@@ -93,7 +99,7 @@ module CambridgeDict
93
99
  end
94
100
 
95
101
  def extract_word(doc)
96
- doc.at_css('.hw.dhw')&.text
102
+ doc.at_css('.superentry .tb')&.text
97
103
  end
98
104
 
99
105
  def extract_pos(doc)
@@ -140,13 +146,15 @@ module CambridgeDict
140
146
  end
141
147
 
142
148
  pos = entry&.at_css('.pos.dpos')&.text
149
+ headword = entry&.at_css('.headword .hw')&.text
143
150
 
144
151
  definition = {
145
152
  id: index,
153
+ headword: headword,
146
154
  pos: pos,
147
155
  source: source || nil,
148
- text: block.at_css('.def.ddef_d.db')&.text,
149
- translation: block.at_css('.def-body.ddef_b > span.trans.dtrans')&.text,
156
+ text: block.at_css('.def.ddef_d.db')&.text&.strip,
157
+ translation: block.at_css('.def-body.ddef_b > span.trans.dtrans')&.text&.strip,
150
158
  example: extract_examples(block)
151
159
  }
152
160
 
@@ -171,28 +179,4 @@ module CambridgeDict
171
179
  end
172
180
  end
173
181
 
174
- class API < Sinatra::Base
175
- register Sinatra::Cors
176
-
177
- set :allow_origin, "*"
178
- set :allow_methods, "GET,HEAD,POST"
179
- set :allow_headers, "content-type,if-modified-since"
180
- set :expose_headers, "location,link"
181
-
182
- client = Client.new
183
-
184
- get '/api/dictionary/:language/:entry' do
185
- content_type :json
186
-
187
- result = client.lookup(params[:entry], language: params[:language])
188
-
189
- if result && !result[:word].empty?
190
- status 200
191
- result.to_json
192
- else
193
- status 404
194
- {error: "word not found"}.to_json
195
- end
196
- end
197
- end
198
182
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cambridge_dict
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben D'Angelo
@@ -14,58 +14,30 @@ dependencies:
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.15'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.15'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: httparty
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '0.21'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '0.21'
41
- - !ruby/object:Gem::Dependency
42
- name: sinatra
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
31
+ - - ">="
46
32
  - !ruby/object:Gem::Version
47
- version: '3.1'
33
+ version: 0.1.0
48
34
  type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '3.1'
55
- - !ruby/object:Gem::Dependency
56
- name: sinatra-cors
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '1.2'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - "~>"
38
+ - - ">="
67
39
  - !ruby/object:Gem::Version
68
- version: '1.2'
40
+ version: 0.1.0
69
41
  - !ruby/object:Gem::Dependency
70
42
  name: rspec
71
43
  requirement: !ruby/object:Gem::Requirement
@@ -119,6 +91,7 @@ files:
119
91
  - Rakefile
120
92
  - config.ru
121
93
  - lib/cambridge_dict.rb
94
+ - lib/cambridge_dict/server.rb
122
95
  - lib/cambridge_dict/version.rb
123
96
  homepage: https://codeberg.org/bendangelo/cambridge_dict
124
97
  licenses: