google_web_translate 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/Gemfile +3 -0
- data/data/urls.txt +122 -0
- data/google-web-translate.gemspec +31 -30
- data/lib/google_web_translate.rb +6 -5
- data/lib/google_web_translate/api.rb +142 -138
- data/lib/google_web_translate/cli.rb +18 -18
- data/lib/google_web_translate/http_client.rb +26 -26
- data/lib/google_web_translate/result.rb +54 -54
- data/lib/google_web_translate/server_list.rb +146 -0
- data/lib/google_web_translate/version.rb +3 -3
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4fa88abae6fc3035a666db53549211dd084bbaaf
|
4
|
+
data.tar.gz: 4942b2dd7b155f4507d8fd1b2c902c7e66074e98
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 857468dc26b8538e2be64454b1b1db9dc65ada47a53d63766c351e813ff9a5010e1759333aa0a80b194faae9adf26b895ef69aaf63c524398467016f2a04e394
|
7
|
+
data.tar.gz: e37080f63adfbf5e0464b2e9f8340f7b22160e25ba36b736dd6e16404fb13e92359f80f4b9e6a3485ea1c9a1b4176dd7dc55f0360949637aae210f6a552b9287
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
@@ -17,5 +17,8 @@ end
|
|
17
17
|
optional_gem 'therubyracer', platform: :ruby
|
18
18
|
optional_gem 'therubyrhino', platform: :jruby
|
19
19
|
|
20
|
+
# optional c extensions
|
21
|
+
optional_gem 'concurrent-ruby-ext', platform: :ruby
|
22
|
+
|
20
23
|
# Specify your gem's dependencies in google-web-translate.gemspec
|
21
24
|
gemspec
|
data/data/urls.txt
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
google.ae
|
2
|
+
google.am
|
3
|
+
google.as
|
4
|
+
google.at
|
5
|
+
google.at
|
6
|
+
google.az
|
7
|
+
google.ba
|
8
|
+
google.be
|
9
|
+
google.bg
|
10
|
+
google.ca
|
11
|
+
google.cd
|
12
|
+
google.ch
|
13
|
+
google.ci
|
14
|
+
google.cl
|
15
|
+
google.cn
|
16
|
+
google.co.cr
|
17
|
+
google.co.id
|
18
|
+
google.co.il
|
19
|
+
google.co.in
|
20
|
+
google.co.jp
|
21
|
+
google.co.ke
|
22
|
+
google.co.kr
|
23
|
+
google.com
|
24
|
+
google.co.ma
|
25
|
+
google.com.ai
|
26
|
+
google.com.ar
|
27
|
+
google.com.au
|
28
|
+
google.com.bd
|
29
|
+
google.com.bh
|
30
|
+
google.com.bn
|
31
|
+
google.com.bo
|
32
|
+
google.com.br
|
33
|
+
google.com.co
|
34
|
+
google.com.cu
|
35
|
+
google.com.do
|
36
|
+
google.com.ec
|
37
|
+
google.com.eg
|
38
|
+
google.com.et
|
39
|
+
google.com.gi
|
40
|
+
google.com.gt
|
41
|
+
google.com.hk
|
42
|
+
google.com.jm
|
43
|
+
google.com.kh
|
44
|
+
google.com.ly
|
45
|
+
google.com.mt
|
46
|
+
google.com.mx
|
47
|
+
google.com.my
|
48
|
+
google.com.na
|
49
|
+
google.com.ng
|
50
|
+
google.com.ni
|
51
|
+
google.com.om
|
52
|
+
google.com.pa
|
53
|
+
google.com.pe
|
54
|
+
google.com.ph
|
55
|
+
google.com.pk
|
56
|
+
google.com.pr
|
57
|
+
google.com.py
|
58
|
+
google.com.qa
|
59
|
+
google.com.sa
|
60
|
+
google.com.sg
|
61
|
+
google.com.sv
|
62
|
+
google.com.tr
|
63
|
+
google.com.tw
|
64
|
+
google.com.ua
|
65
|
+
google.com.uy
|
66
|
+
google.com.vc
|
67
|
+
google.com.vn
|
68
|
+
google.co.nz
|
69
|
+
google.co.th
|
70
|
+
google.co.ug
|
71
|
+
google.co.uk
|
72
|
+
google.co.uz
|
73
|
+
google.co.ve
|
74
|
+
google.co.za
|
75
|
+
google.co.zm
|
76
|
+
google.cz
|
77
|
+
google.de
|
78
|
+
google.dj
|
79
|
+
google.dk
|
80
|
+
google.ee
|
81
|
+
google.es
|
82
|
+
google.fi
|
83
|
+
google.fr
|
84
|
+
google.ge
|
85
|
+
google.gm
|
86
|
+
google.gp
|
87
|
+
google.gr
|
88
|
+
google.hn
|
89
|
+
google.hr
|
90
|
+
google.ht
|
91
|
+
google.hu
|
92
|
+
google.ie
|
93
|
+
google.is
|
94
|
+
google.it
|
95
|
+
google.je
|
96
|
+
google.jo
|
97
|
+
google.kz
|
98
|
+
google.li
|
99
|
+
google.lk
|
100
|
+
google.lt
|
101
|
+
google.lu
|
102
|
+
google.lv
|
103
|
+
google.ma
|
104
|
+
google.md
|
105
|
+
google.mn
|
106
|
+
google.mu
|
107
|
+
google.nl
|
108
|
+
google.no
|
109
|
+
google.nu
|
110
|
+
google.pl
|
111
|
+
google.pl
|
112
|
+
google.pt
|
113
|
+
google.ro
|
114
|
+
google.ru
|
115
|
+
google.rw
|
116
|
+
google.se
|
117
|
+
google.si
|
118
|
+
google.sk
|
119
|
+
google.sm
|
120
|
+
google.sn
|
121
|
+
google.to
|
122
|
+
google.tt
|
@@ -1,30 +1,31 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
3
|
-
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require 'google_web_translate/version'
|
5
|
-
|
6
|
-
Gem::Specification.new do |spec|
|
7
|
-
spec.name = 'google_web_translate'
|
8
|
-
spec.version = GoogleWebTranslate::VERSION
|
9
|
-
spec.authors = ['Andrew']
|
10
|
-
spec.email = ['sobakasu@gmail.com']
|
11
|
-
|
12
|
-
spec.summary = 'Text translation using the google web interface'
|
13
|
-
spec.homepage = 'https://github.com/sobakasu/google_web_translate'
|
14
|
-
spec.license = 'MIT'
|
15
|
-
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
16
|
-
f.match(%r{^(test|spec|features)/})
|
17
|
-
end
|
18
|
-
spec.bindir = 'bin'
|
19
|
-
spec.executables = %w[google_web_translate]
|
20
|
-
spec.require_paths = ['lib']
|
21
|
-
|
22
|
-
spec.add_development_dependency 'bundler', '~> 1.16'
|
23
|
-
spec.add_development_dependency 'rake', '~> 10.0'
|
24
|
-
spec.add_development_dependency 'rspec', '~> 3.0'
|
25
|
-
spec.add_development_dependency 'simplecov'
|
26
|
-
spec.add_development_dependency 'webmock'
|
27
|
-
|
28
|
-
spec.add_dependency '
|
29
|
-
spec.add_dependency '
|
30
|
-
|
1
|
+
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'google_web_translate/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'google_web_translate'
|
8
|
+
spec.version = GoogleWebTranslate::VERSION
|
9
|
+
spec.authors = ['Andrew']
|
10
|
+
spec.email = ['sobakasu@gmail.com']
|
11
|
+
|
12
|
+
spec.summary = 'Text translation using the google web interface'
|
13
|
+
spec.homepage = 'https://github.com/sobakasu/google_web_translate'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
16
|
+
f.match(%r{^(test|spec|features)/})
|
17
|
+
end
|
18
|
+
spec.bindir = 'bin'
|
19
|
+
spec.executables = %w[google_web_translate]
|
20
|
+
spec.require_paths = ['lib']
|
21
|
+
|
22
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
23
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
24
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
25
|
+
spec.add_development_dependency 'simplecov'
|
26
|
+
spec.add_development_dependency 'webmock'
|
27
|
+
|
28
|
+
spec.add_dependency 'concurrent-ruby'
|
29
|
+
spec.add_dependency 'execjs'
|
30
|
+
spec.add_dependency 'thor'
|
31
|
+
end
|
data/lib/google_web_translate.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require 'google_web_translate/version.rb'
|
2
|
-
require 'google_web_translate/
|
3
|
-
require 'google_web_translate/
|
4
|
-
require 'google_web_translate/
|
5
|
-
require 'google_web_translate/
|
1
|
+
require 'google_web_translate/version.rb'
|
2
|
+
require 'google_web_translate/server_list.rb'
|
3
|
+
require 'google_web_translate/string_escaping.rb'
|
4
|
+
require 'google_web_translate/http_client.rb'
|
5
|
+
require 'google_web_translate/result.rb'
|
6
|
+
require 'google_web_translate/api.rb'
|
@@ -1,138 +1,142 @@
|
|
1
|
-
require 'execjs'
|
2
|
-
require 'json'
|
3
|
-
|
4
|
-
module GoogleWebTranslate
|
5
|
-
# interface to the google web translation api
|
6
|
-
class API
|
7
|
-
def initialize(options = {})
|
8
|
-
@dt = options[:dt] || DEFAULT_DT
|
9
|
-
@token_ttl = options[:token_ttl] || DEFAULT_TOKEN_TTL
|
10
|
-
@debug = options[:debug]
|
11
|
-
@http_client = options[:http_client] || HTTPClient.new(options)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
html
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
@
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
debug("
|
113
|
-
tkk
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
end
|
133
|
-
|
134
|
-
def debug
|
135
|
-
|
136
|
-
end
|
137
|
-
|
138
|
-
|
1
|
+
require 'execjs'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module GoogleWebTranslate
|
5
|
+
# interface to the google web translation api
|
6
|
+
class API
|
7
|
+
def initialize(options = {})
|
8
|
+
@dt = options[:dt] || DEFAULT_DT
|
9
|
+
@token_ttl = options[:token_ttl] || DEFAULT_TOKEN_TTL
|
10
|
+
@debug = options[:debug]
|
11
|
+
@http_client = options[:http_client] || HTTPClient.new(options)
|
12
|
+
@rate_limit = options[:rate_limit] || DEFAULT_RATE_LIMIT
|
13
|
+
end
|
14
|
+
|
15
|
+
def translate(string, from, to)
|
16
|
+
data = fetch_translation(string, from, to)
|
17
|
+
Result.new(data)
|
18
|
+
end
|
19
|
+
|
20
|
+
def languages
|
21
|
+
@languages ||= begin
|
22
|
+
html = fetch_main
|
23
|
+
html.scan(/\['(\w{2})','(\w{2})'\]/).flatten.uniq.sort
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
URL_MAIN = 'https://translate.google.com'.freeze
|
30
|
+
TRANSLATE_PATH = '/translate_a/single'.freeze
|
31
|
+
DEFAULT_DT = %w[at bd ex ld md qca rw rm ss t].freeze
|
32
|
+
DEFAULT_TOKEN_TTL = 3600
|
33
|
+
DEFAULT_RATE_LIMIT = 5
|
34
|
+
|
35
|
+
def fetch_translation(string, from, to)
|
36
|
+
server = ServerList.next_server(@rate_limit)
|
37
|
+
json = fetch_url_body(translate_url(server, string, from, to))
|
38
|
+
# File.write("response.json", json) if debug?
|
39
|
+
debug("response: #{json}")
|
40
|
+
JSON.parse(json)
|
41
|
+
end
|
42
|
+
|
43
|
+
def fetch_url_response(url)
|
44
|
+
@http_client.get(url.to_s)
|
45
|
+
end
|
46
|
+
|
47
|
+
def fetch_url_body(url)
|
48
|
+
uri = URI.parse(url)
|
49
|
+
uri = URI.join(URL_MAIN, url) if uri.relative?
|
50
|
+
debug("fetch #{uri}")
|
51
|
+
response = fetch_url_response(uri)
|
52
|
+
response.body
|
53
|
+
end
|
54
|
+
|
55
|
+
def valid_token?
|
56
|
+
@token_updated_at && Time.now - @token_updated_at < @token_ttl
|
57
|
+
end
|
58
|
+
|
59
|
+
def fetch_main(options = {})
|
60
|
+
@html = nil if options[:no_cache]
|
61
|
+
@html ||= fetch_url_body(URL_MAIN)
|
62
|
+
end
|
63
|
+
|
64
|
+
def fetch_desktop_module(html)
|
65
|
+
html =~ /([^="]*desktop_module_main.js)/
|
66
|
+
url = Regexp.last_match(1)
|
67
|
+
raise 'unable to find desktop module' unless url
|
68
|
+
fetch_url_body(url)
|
69
|
+
end
|
70
|
+
|
71
|
+
def munge_module(js)
|
72
|
+
js.gsub(/((?:var\s+)?\w+\s*=\s*\w+\.createElement.*?;)/) do |_i|
|
73
|
+
'return "";'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def compile_js(html)
|
78
|
+
desktop_module_js = munge_module(fetch_desktop_module(html))
|
79
|
+
window_js = File.read(File.join(__dir__, '..', 'js', 'window.js'))
|
80
|
+
js = window_js + desktop_module_js
|
81
|
+
# File.write('generated.js', js) if debug?
|
82
|
+
ExecJS.compile(js)
|
83
|
+
end
|
84
|
+
|
85
|
+
def update_token
|
86
|
+
# download main page
|
87
|
+
html = fetch_main(no_cache: true)
|
88
|
+
# extract tkk from html
|
89
|
+
@tkk = extract_tkk(html)
|
90
|
+
# compile desktop module javascript
|
91
|
+
@js_context = compile_js(html)
|
92
|
+
@token_updated_at = Time.now
|
93
|
+
end
|
94
|
+
|
95
|
+
def tk(string)
|
96
|
+
update_token unless valid_token?
|
97
|
+
@js_context.call('setWindowProperty', 'TKK', @tkk)
|
98
|
+
# tk = @js_context.call("wq", string)
|
99
|
+
tk = @js_context.call('generateToken', string, @tkk)
|
100
|
+
(tk.split('=') || [])[1]
|
101
|
+
end
|
102
|
+
|
103
|
+
def tk_js
|
104
|
+
File.read(File.join(__dir__, 'google_web.js'))
|
105
|
+
end
|
106
|
+
|
107
|
+
def extract_tkk(html)
|
108
|
+
raise 'TKK not found' unless html =~ /TKK=eval\('(.*?)'\);/
|
109
|
+
tkk_code = Regexp.last_match(1)
|
110
|
+
# tkk_code = Translatomatic::StringEscaping.unescape(tkk_code)
|
111
|
+
tkk_code = StringEscaping.unescape(tkk_code)
|
112
|
+
debug("tkk code unescaped: #{tkk_code}")
|
113
|
+
tkk = ExecJS.eval(tkk_code)
|
114
|
+
# tkk = context.call(nil)
|
115
|
+
debug("evaluated tkk: #{tkk}")
|
116
|
+
tkk
|
117
|
+
end
|
118
|
+
|
119
|
+
def translate_url(server, string, from, to)
|
120
|
+
tk = tk(string)
|
121
|
+
debug("tk: #{tk}")
|
122
|
+
query = {
|
123
|
+
sl: from, tl: to, ie: 'UTF-8', oe: 'UTF-8',
|
124
|
+
q: string, dt: @dt, tk: tk,
|
125
|
+
# not sure what these are for
|
126
|
+
client: 't', hl: 'en', otf: 1, ssel: 4, tsel: 6, kc: 5
|
127
|
+
}
|
128
|
+
url = "https://#{server.host}" + TRANSLATE_PATH
|
129
|
+
uri = URI.parse(url)
|
130
|
+
uri.query = URI.encode_www_form(query)
|
131
|
+
uri.to_s
|
132
|
+
end
|
133
|
+
|
134
|
+
def debug(msg)
|
135
|
+
puts msg if debug?
|
136
|
+
end
|
137
|
+
|
138
|
+
def debug?
|
139
|
+
@debug
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -1,18 +1,18 @@
|
|
1
|
-
require 'thor'
|
2
|
-
require 'pp'
|
3
|
-
|
4
|
-
module GoogleWebTranslate
|
5
|
-
# Command line interface
|
6
|
-
class CLI < Thor
|
7
|
-
desc 'string from to', 'translate a string from one language to another'
|
8
|
-
method_option :dt, type: :array, desc: 'data types'
|
9
|
-
def translate(string, from, to)
|
10
|
-
api_options = { debug: ENV['DEBUG'] }
|
11
|
-
api_options[:dt] = options[:dt] if options[:dt]
|
12
|
-
|
13
|
-
api = API.new(api_options)
|
14
|
-
result = api.translate(string, from, to)
|
15
|
-
pp result.to_h
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
1
|
+
require 'thor'
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
module GoogleWebTranslate
|
5
|
+
# Command line interface
|
6
|
+
class CLI < Thor
|
7
|
+
desc 'string from to', 'translate a string from one language to another'
|
8
|
+
method_option :dt, type: :array, desc: 'data types'
|
9
|
+
def translate(string, from, to)
|
10
|
+
api_options = { debug: ENV['DEBUG'] }
|
11
|
+
api_options[:dt] = options[:dt] if options[:dt]
|
12
|
+
|
13
|
+
api = API.new(api_options)
|
14
|
+
result = api.translate(string, from, to)
|
15
|
+
pp result.to_h
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -1,26 +1,26 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
|
3
|
-
module GoogleWebTranslate
|
4
|
-
# HTTP client functionality
|
5
|
-
class HTTPClient
|
6
|
-
def self.user_agent
|
7
|
-
gem_version = "GoogleWebTranslate/#{VERSION}"
|
8
|
-
platform_version = "(#{RUBY_PLATFORM}) #{RUBY_ENGINE}/#{RUBY_VERSION}"
|
9
|
-
gem_version + ' ' + platform_version
|
10
|
-
end
|
11
|
-
|
12
|
-
def initialize(options = {})
|
13
|
-
@user_agent = options[:user_agent] || self.class.user_agent
|
14
|
-
end
|
15
|
-
|
16
|
-
def get(url)
|
17
|
-
uri = URI.parse(url)
|
18
|
-
request = Net::HTTP::Get.new(uri)
|
19
|
-
request['User-Agent'] = @user_agent
|
20
|
-
options = { use_ssl: uri.scheme == 'https' }
|
21
|
-
Net::HTTP.start(uri.host, uri.port, options) do |http|
|
22
|
-
http.request(request)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
1
|
+
require 'net/http'
|
2
|
+
|
3
|
+
module GoogleWebTranslate
|
4
|
+
# HTTP client functionality
|
5
|
+
class HTTPClient
|
6
|
+
def self.user_agent
|
7
|
+
gem_version = "GoogleWebTranslate/#{VERSION}"
|
8
|
+
platform_version = "(#{RUBY_PLATFORM}) #{RUBY_ENGINE}/#{RUBY_VERSION}"
|
9
|
+
gem_version + ' ' + platform_version
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(options = {})
|
13
|
+
@user_agent = options[:user_agent] || self.class.user_agent
|
14
|
+
end
|
15
|
+
|
16
|
+
def get(url)
|
17
|
+
uri = URI.parse(url)
|
18
|
+
request = Net::HTTP::Get.new(uri)
|
19
|
+
request['User-Agent'] = @user_agent
|
20
|
+
options = { use_ssl: uri.scheme == 'https' }
|
21
|
+
Net::HTTP.start(uri.host, uri.port, options) do |http|
|
22
|
+
http.request(request)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -1,54 +1,54 @@
|
|
1
|
-
module GoogleWebTranslate
|
2
|
-
# Translation results
|
3
|
-
class Result
|
4
|
-
attr_reader :raw
|
5
|
-
|
6
|
-
# @private
|
7
|
-
DATA_INDICES = {
|
8
|
-
translation: [0, 0, 0], # dt:t
|
9
|
-
alternatives: [5, 0, 2], # dt:at
|
10
|
-
dictionary: [1], # dt: bd
|
11
|
-
synonyms: [11], # dt:ss
|
12
|
-
definitions: [12, 0], # dt:md
|
13
|
-
examples: [13, 0], # dt:ex
|
14
|
-
see_also: [14, 0], # dt:rw
|
15
|
-
}.freeze
|
16
|
-
|
17
|
-
DATA_INDICES.each_key { |key| attr_reader key }
|
18
|
-
|
19
|
-
def initialize(data)
|
20
|
-
@raw = data
|
21
|
-
@keys = []
|
22
|
-
@properties = {}
|
23
|
-
|
24
|
-
DATA_INDICES.each do |key, indices|
|
25
|
-
indices = indices.dup
|
26
|
-
extract_data(key, *indices)
|
27
|
-
end
|
28
|
-
|
29
|
-
@alternatives = @alternatives.collect { |i| i[0] } if @alternatives
|
30
|
-
@keys.each { |key| @properties[key] = instance_variable_get("@#{key}") }
|
31
|
-
end
|
32
|
-
|
33
|
-
def to_h
|
34
|
-
@properties
|
35
|
-
end
|
36
|
-
|
37
|
-
private
|
38
|
-
|
39
|
-
def extract_data(name, *indices)
|
40
|
-
value = array_value(@raw, *indices)
|
41
|
-
return if value.nil?
|
42
|
-
instance_variable_set("@#{name}", value)
|
43
|
-
@keys.push(name)
|
44
|
-
end
|
45
|
-
|
46
|
-
def array_value(array, *indices)
|
47
|
-
return nil if array.nil?
|
48
|
-
index = indices.shift
|
49
|
-
value = array[index]
|
50
|
-
return value if indices.empty?
|
51
|
-
array_value(value, *indices)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
1
|
+
module GoogleWebTranslate
|
2
|
+
# Translation results
|
3
|
+
class Result
|
4
|
+
attr_reader :raw
|
5
|
+
|
6
|
+
# @private
|
7
|
+
DATA_INDICES = {
|
8
|
+
translation: [0, 0, 0], # dt:t
|
9
|
+
alternatives: [5, 0, 2], # dt:at
|
10
|
+
dictionary: [1], # dt: bd
|
11
|
+
synonyms: [11], # dt:ss
|
12
|
+
definitions: [12, 0], # dt:md
|
13
|
+
examples: [13, 0], # dt:ex
|
14
|
+
see_also: [14, 0], # dt:rw
|
15
|
+
}.freeze
|
16
|
+
|
17
|
+
DATA_INDICES.each_key { |key| attr_reader key }
|
18
|
+
|
19
|
+
def initialize(data)
|
20
|
+
@raw = data
|
21
|
+
@keys = []
|
22
|
+
@properties = {}
|
23
|
+
|
24
|
+
DATA_INDICES.each do |key, indices|
|
25
|
+
indices = indices.dup
|
26
|
+
extract_data(key, *indices)
|
27
|
+
end
|
28
|
+
|
29
|
+
@alternatives = @alternatives.collect { |i| i[0] } if @alternatives
|
30
|
+
@keys.each { |key| @properties[key] = instance_variable_get("@#{key}") }
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_h
|
34
|
+
@properties
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def extract_data(name, *indices)
|
40
|
+
value = array_value(@raw, *indices)
|
41
|
+
return if value.nil?
|
42
|
+
instance_variable_set("@#{name}", value)
|
43
|
+
@keys.push(name)
|
44
|
+
end
|
45
|
+
|
46
|
+
def array_value(array, *indices)
|
47
|
+
return nil if array.nil?
|
48
|
+
index = indices.shift
|
49
|
+
value = array[index]
|
50
|
+
return value if indices.empty?
|
51
|
+
array_value(value, *indices)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'concurrent'
|
2
|
+
require 'resolv'
|
3
|
+
require 'json'
|
4
|
+
|
5
|
+
module GoogleWebTranslate
|
6
|
+
# @private
|
7
|
+
SERVER_ATTRIBUTES = %i[host ip resolved_at last_used_at
|
8
|
+
counter available].freeze
|
9
|
+
|
10
|
+
Server = Struct.new(*SERVER_ATTRIBUTES) do
|
11
|
+
def to_json(*args)
|
12
|
+
result = {}
|
13
|
+
each_pair { |key, value| result[key] = value }
|
14
|
+
result.to_json(args)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class ServerList
|
19
|
+
class << self
|
20
|
+
def servers
|
21
|
+
update_servers if @servers.nil?
|
22
|
+
@servers.dup
|
23
|
+
end
|
24
|
+
|
25
|
+
def next_server(rate_limit = nil)
|
26
|
+
@mutex ||= Mutex.new
|
27
|
+
@mutex.synchronize do
|
28
|
+
@counter ||= 0
|
29
|
+
@counter += 1
|
30
|
+
|
31
|
+
list = servers.sort_by { |i| i.counter || 0 }
|
32
|
+
server = list[0]
|
33
|
+
server.counter = @counter
|
34
|
+
sleep(rate_limit_delay(server, rate_limit))
|
35
|
+
server.last_used_at = Time.now
|
36
|
+
server
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
MAX_TTL = 86_400
|
43
|
+
|
44
|
+
def rate_limit_delay(server, rate_limit)
|
45
|
+
return 0 unless rate_limit && server.last_used_at
|
46
|
+
delay = rate_limit - (Time.now - server.last_used_at)
|
47
|
+
delay < 0 || ENV['TEST'] ? 0 : delay
|
48
|
+
end
|
49
|
+
|
50
|
+
def update_servers
|
51
|
+
server_list = read_server_data
|
52
|
+
pool = Concurrent::CachedThreadPool.new
|
53
|
+
# puts "updating #{server_list.length} servers"
|
54
|
+
server_list.each do |server|
|
55
|
+
pool.post { update_server(server) }
|
56
|
+
end
|
57
|
+
|
58
|
+
pool.shutdown
|
59
|
+
pool.wait_for_termination
|
60
|
+
@servers = unique_servers(server_list)
|
61
|
+
# puts "#{@servers.length} unique servers found"
|
62
|
+
save_server_data(server_list)
|
63
|
+
end
|
64
|
+
|
65
|
+
def update_server(server)
|
66
|
+
now = Time.now.to_i
|
67
|
+
if server.resolved_at.nil? ||
|
68
|
+
now - server.resolved_at > MAX_TTL || !server.available
|
69
|
+
server.resolved_at = now
|
70
|
+
server.ip = resolve_ip(server.host)
|
71
|
+
end
|
72
|
+
server.available = true
|
73
|
+
rescue Resolv::ResolvError
|
74
|
+
# puts "server #{server.host} is unavailable: #{e}"
|
75
|
+
server.available = false
|
76
|
+
end
|
77
|
+
|
78
|
+
def data_dir
|
79
|
+
File.join(__dir__, '..', '..', 'data')
|
80
|
+
end
|
81
|
+
|
82
|
+
def server_data_path
|
83
|
+
File.join(data_dir, 'server_data.txt')
|
84
|
+
end
|
85
|
+
|
86
|
+
def url_list_path
|
87
|
+
File.join(data_dir, 'urls.txt')
|
88
|
+
end
|
89
|
+
|
90
|
+
def hostnames
|
91
|
+
names = []
|
92
|
+
lines = File.read(url_list_path).split(/[\r\n]+/)
|
93
|
+
lines.each do |host|
|
94
|
+
next unless host && !host.empty?
|
95
|
+
names << "translate.#{host}"
|
96
|
+
end
|
97
|
+
names
|
98
|
+
end
|
99
|
+
|
100
|
+
def unique_servers(list)
|
101
|
+
server_by_ip = {}
|
102
|
+
list.each do |server|
|
103
|
+
next unless server.available
|
104
|
+
server_by_ip[server.ip] = server
|
105
|
+
end
|
106
|
+
server_by_ip.values
|
107
|
+
end
|
108
|
+
|
109
|
+
def initial_data
|
110
|
+
hostnames.collect do |host|
|
111
|
+
server = Server.new
|
112
|
+
server.host = host
|
113
|
+
server
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def read_server_data
|
118
|
+
return initial_data unless File.exist?(server_data_path)
|
119
|
+
data = JSON.parse(File.read(server_data_path))
|
120
|
+
server_list = []
|
121
|
+
data.each do |entry|
|
122
|
+
attributes = SERVER_ATTRIBUTES.collect { |i| entry[i.to_s] }
|
123
|
+
server = Server.new(*attributes)
|
124
|
+
next unless server.host && !server.host.empty?
|
125
|
+
server.counter = 0
|
126
|
+
server_list << server
|
127
|
+
end
|
128
|
+
server_list
|
129
|
+
end
|
130
|
+
|
131
|
+
def save_server_data(servers)
|
132
|
+
File.write(server_data_path, servers.to_json)
|
133
|
+
end
|
134
|
+
|
135
|
+
def resolver
|
136
|
+
resolver = Resolv::DNS.new
|
137
|
+
resolver.timeouts = 5
|
138
|
+
resolver
|
139
|
+
end
|
140
|
+
|
141
|
+
def resolve_ip(host)
|
142
|
+
resolver.getaddress(host).to_s
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -1,3 +1,3 @@
|
|
1
|
-
module GoogleWebTranslate
|
2
|
-
VERSION = '0.2.
|
3
|
-
end
|
1
|
+
module GoogleWebTranslate
|
2
|
+
VERSION = '0.2.3'.freeze
|
3
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: google_web_translate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-01
|
11
|
+
date: 2018-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: concurrent-ruby
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: execjs
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,12 +142,14 @@ files:
|
|
128
142
|
- bin/console
|
129
143
|
- bin/google_web_translate
|
130
144
|
- bin/setup
|
145
|
+
- data/urls.txt
|
131
146
|
- google-web-translate.gemspec
|
132
147
|
- lib/google_web_translate.rb
|
133
148
|
- lib/google_web_translate/api.rb
|
134
149
|
- lib/google_web_translate/cli.rb
|
135
150
|
- lib/google_web_translate/http_client.rb
|
136
151
|
- lib/google_web_translate/result.rb
|
152
|
+
- lib/google_web_translate/server_list.rb
|
137
153
|
- lib/google_web_translate/string_escaping.rb
|
138
154
|
- lib/google_web_translate/version.rb
|
139
155
|
- lib/js/window.js
|