google_translate_scraper 0.1.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -12,7 +12,7 @@ Add this line to your application's Gemfile:
12
12
 
13
13
  And then execute:
14
14
 
15
- $ bundle
15
+ $ bundle install
16
16
 
17
17
  Or install it yourself as:
18
18
 
@@ -22,11 +22,11 @@ Or install it yourself as:
22
22
 
23
23
  1- require 'google_translate_scraper'
24
24
 
25
- 2- translator = GoogleTranslateScraper::Translator.new
25
+ 2- response = GoogleTranslateScraper.translate(:source_language => 'en', :target_language => 'sv', :search_text => 'hey')
26
26
 
27
- 3- translations = translations = translator.translate( source_language = "en", target_language = "sv", search_phrase = "sup fool" )
27
+ 3- response.translations.first
28
28
 
29
- NOTE: source and target language values must be a string containg the ISO 639-1 letter code for that language. For a list see here: http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
29
+ NOTE: source and target language values must be a string containg the ISO 639-1 two letter code for that language. For a list see here: http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
30
30
 
31
31
  ## Contributing
32
32
 
@@ -1,6 +1,12 @@
1
1
  require "google_translate_scraper/version"
2
- require "google_translate_scraper/translator"
2
+ require "google_translate_scraper/models/translator"
3
+ require "google_translate_scraper/models/error"
4
+ require "google_translate_scraper/models/translation"
5
+ require "google_translate_scraper/models/response"
3
6
 
4
7
  module GoogleTranslateScraper
5
- # Your code goes here...
8
+
9
+ def self.translate args
10
+ Models::Translator.new(args).translate
11
+ end
6
12
  end
@@ -0,0 +1,17 @@
1
+ module GoogleTranslateScraper
2
+ module Models
3
+
4
+ class Error
5
+
6
+ attr_accessor :message
7
+
8
+ def initialize(arguments = {})
9
+ arguments.each do |key, value|
10
+ instance_variable_set("@#{key}", value)
11
+ end
12
+ end
13
+
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,14 @@
1
+ module GoogleTranslateScraper
2
+ module Models
3
+
4
+ class Response
5
+
6
+ attr_accessor :errors, :translations
7
+
8
+ def initialize
9
+ @errors = []
10
+ @translations = []
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ module GoogleTranslateScraper
2
+ module Models
3
+
4
+ class Translation
5
+
6
+ attr_accessor :source_language, :target_language, :search_text, :translated_text
7
+
8
+ def initialize(arguments = {})
9
+ arguments.each do |key, value|
10
+ instance_variable_set("@#{key}", value)
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,96 @@
1
+ module GoogleTranslateScraper
2
+ module Models
3
+ require 'net/http'
4
+ require 'nokogiri'
5
+
6
+ class Translator
7
+
8
+ attr_accessor :source_language, :target_language, :search_text, :response
9
+
10
+ def initialize(arguments)
11
+ arguments.each do |key, value|
12
+ instance_variable_set("@#{key}", value)
13
+ end
14
+ @response = Response.new
15
+ end
16
+
17
+ def translate
18
+ if valid?
19
+ populate_translations
20
+ end
21
+ response
22
+ end
23
+
24
+ def required_variables
25
+ %w[source_language target_language search_text]
26
+ end
27
+
28
+ private
29
+
30
+ def valid?
31
+ validate_required_variables
32
+ end
33
+
34
+ def validate_required_variables
35
+ required_variables.each do |required_instance_variable|
36
+ add_error("Required argument missing: #{required_instance_variable}") unless instance_variable_get "@#{required_instance_variable}"
37
+ end
38
+ response.errors.size > 0 ? false : true
39
+ end
40
+
41
+ def populate_translations
42
+ fetch_from_google
43
+ add_error("No translations were found. Google might have changed their HTML") unless @response.translations.size > 0
44
+ end
45
+
46
+ def page
47
+ # set POST variables
48
+ js = "n"
49
+ prev = "_t"
50
+ hv = "en"
51
+ ie = "UTF-8"
52
+ layout = "2"
53
+ eotf = "1"
54
+ file = ""
55
+
56
+ http_response = Net::HTTP.post_form(URI.parse('http://translate.google.com/'), {:sl => source_language, :tl => target_language, :js => js, :prev => prev, hv => hv, :ie =>
57
+ ie , :layout => layout, :eotf => eotf, :text => search_text, :file => file})
58
+ html_string = http_response.body
59
+ Nokogiri::HTML(html_string)
60
+ end
61
+
62
+ def fetch_from_google
63
+ translation = get_dictionary_translation(page)
64
+ add_translation(get_non_dictionary_translation(page).text) if response.translations.size == 0
65
+ end
66
+
67
+ # when there are multiple translations
68
+ def get_dictionary_translation html_doc
69
+ translations = Array.new
70
+ translation = html_doc.xpath("//span[@id='result_box']")
71
+ translation.each do |t|
72
+ add_translation t.text
73
+ end
74
+ translations
75
+ end
76
+
77
+ # when there is a phrase with no dictionary translation i.e multiple possible translations
78
+ def get_non_dictionary_translation html_doc
79
+ html_doc.xpath('//*[@id="result_box"]/span')
80
+ end
81
+
82
+ def add_translation translated_text
83
+ response.translations << Translation.new(
84
+ :source_language => @source_language,
85
+ :target_language => @target_language,
86
+ :search_text => @search_text,
87
+ :translated_text => translated_text
88
+ )
89
+ end
90
+
91
+ def add_error error_message
92
+ response.errors << Error.new(:message => error_message)
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,3 +1,3 @@
1
1
  module GoogleTranslateScraper
2
- VERSION = "0.1.1"
2
+ VERSION = "1.0.0"
3
3
  end
@@ -1,31 +1,9 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe GoogleTranslateScraper do
4
- it 'should return an array' do
5
- translator = GoogleTranslateScraper::Translator.new
6
- results = translator.translate("en", "sv", "Hello")
7
- results.should be_a_kind_of Array
8
- end
9
-
10
- # dictionary translations
11
- it 'should return an array of Swedish translations for an English word' do
12
- translator = GoogleTranslateScraper::Translator.new
13
- results = translator.translate("en", "sv", "Hello")
14
- results.size.should > 1
15
- end
16
-
17
- # non-dictinary translations
18
- it 'should return a Swedish translations for a short English sentence' do
19
- translator = GoogleTranslateScraper::Translator.new
20
- results = translator.translate("en", "sv", "Hello there")
21
- results.size.should eql 1
22
- results[0].should eql"Hallå där"
23
- end
24
-
25
- it 'should return a Swedish translations for a long English sentence' do
26
- translator = GoogleTranslateScraper::Translator.new
27
- results = translator.translate("en","sv","Hi how are you? I'm good how are you?")
28
- results.size.should eql 1
29
- results[0].should eql"Hej hur mår du? Jag är bra hur mår du?"
4
+
5
+ it "scrapes and translates google translate" do
6
+ response = subject.translate(:source_language => 'en', :target_language => 'sv', :search_text => 'hey')
7
+ response.translations.first.translated_text.should == 'hej'
30
8
  end
31
9
  end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe GoogleTranslateScraper::Models::Error do
4
+
5
+ describe "attributes" do
6
+
7
+ it "has a message" do
8
+ subject.respond_to?(:message).should == true
9
+ end
10
+ end
11
+
12
+ describe "initialize" do
13
+
14
+ let(:arguments) { {:message => "You didn't provide a source language my man!"} }
15
+
16
+ it "sets all instance variables given" do
17
+
18
+ translation = subject.class.new(arguments)
19
+ arguments.keys.each do |key|
20
+ translation.send(key).should == arguments[key]
21
+ end
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe GoogleTranslateScraper::Models::Response do
4
+
5
+ describe "errors" do
6
+
7
+ before do
8
+ subject.errors << GoogleTranslateScraper::Models::Error.new(:message => "You did something wrong!")
9
+ end
10
+
11
+ it "is iterable" do
12
+ subject.errors.respond_to?(:each).should == true
13
+ end
14
+
15
+ it "is a list of error objects" do
16
+ subject.errors.first.class.should == GoogleTranslateScraper::Models::Error
17
+ end
18
+ end
19
+
20
+ describe "translations" do
21
+
22
+ before do
23
+ subject.translations << GoogleTranslateScraper::Models::Translation.new(:source_language => 'sv', :target_language => 'en', :search_text => 'Hej', :translated_text => 'Hey')
24
+ end
25
+
26
+ it "is iterable" do
27
+ subject.translations.respond_to?(:each).should == true
28
+ end
29
+
30
+ it "is a list of translation objects" do
31
+ subject.translations.first.class.should == GoogleTranslateScraper::Models::Translation
32
+ end
33
+
34
+ end
35
+
36
+ end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+
3
+ describe GoogleTranslateScraper::Models::Translation do
4
+
5
+ describe "attributes" do
6
+
7
+ %w[ source_language target_language search_text translated_text ].each do |attribute|
8
+ it "has a #{attribute}" do
9
+ subject.respond_to?(attribute).should == true
10
+ end
11
+ end
12
+ end
13
+
14
+ describe "initialize" do
15
+
16
+ let(:arguments) { {:source_language => 'sv', :target_language => 'en', :search_text => 'Hej!'} }
17
+
18
+ it "sets all instance variables given" do
19
+
20
+ translation = subject.class.new(arguments)
21
+ arguments.keys.each do |key|
22
+ translation.send(key).should == arguments[key]
23
+ end
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,75 @@
1
+ require 'spec_helper'
2
+
3
+ describe GoogleTranslateScraper::Models::Translator do
4
+
5
+ let(:arguments) {{:source_language => 'en', :target_language => 'sv', :search_text => 'hey'}}
6
+ let(:subject) { GoogleTranslateScraper::Models::Translator.new(arguments)}
7
+
8
+ describe "format of response" do
9
+
10
+ it "returns a Response object" do
11
+ response = subject.translate
12
+ response.class.should == GoogleTranslateScraper::Models::Response
13
+ end
14
+ end
15
+
16
+ describe "successful translation" do
17
+
18
+ context "word/dictionary translation" do
19
+
20
+ let(:arguments) {{:source_language => 'en', :target_language => 'sv', :search_text => 'cat'}}
21
+ let(:subject) { GoogleTranslateScraper::Models::Translator.new(arguments).translate }
22
+
23
+ it "returns a translation" do
24
+ subject.translations.first.translated_text.should == 'katt'
25
+ end
26
+
27
+ end
28
+
29
+ context "phrase/non-dictionary translation" do
30
+
31
+ let(:arguments) {{:source_language => 'en', :target_language => 'sv', :search_text => 'the little cat'}}
32
+ let(:subject) { GoogleTranslateScraper::Models::Translator.new(arguments).translate }
33
+
34
+ it "returns a translation" do
35
+ subject.translations.size.should == 1
36
+ subject.translations.first.translated_text.should == 'den lilla katten'
37
+ end
38
+
39
+ end
40
+ end
41
+
42
+ describe "non-successful translation" do
43
+
44
+ let(:arguments) { {:source_language => 'en', :target_language => 'sv', :search_text => 'hello my friend'} }
45
+ let(:subject) { GoogleTranslateScraper::Models::Translator.new(arguments)}
46
+
47
+ context "a required argument was missing" do
48
+
49
+ it "returns an error" do
50
+ subject.instance_variable_set("@source_language", nil)
51
+ response = subject.translate
52
+ response.errors.first.message.should == "Required argument missing: source_language"
53
+ end
54
+ end
55
+
56
+ context "multiple arguments are missing" do
57
+
58
+ it "returns multiple errors" do
59
+ subject.instance_variable_set("@source_language", nil)
60
+ subject.instance_variable_set("@target_language", nil)
61
+ response = subject.translate
62
+ response.errors.size.should == 2
63
+ end
64
+ end
65
+
66
+ context "no translations were found" do
67
+
68
+ it "returns an error" do
69
+ subject.stub(:fetch_from_google)
70
+ response = subject.translate
71
+ response.errors.first.message.should == "No translations were found. Google might have changed their HTML"
72
+ end
73
+ end
74
+ end
75
+ end
metadata CHANGED
@@ -1,62 +1,56 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: google_translate_scraper
3
- version: !ruby/object:Gem::Version
4
- hash: 25
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 1
10
- version: 0.1.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Seb Glazebrook
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-05-08 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2013-01-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: nokogiri
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 3
29
- segments:
30
- - 0
31
- version: "0"
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
32
22
  type: :runtime
33
- version_requirements: *id001
34
- - !ruby/object:Gem::Dependency
35
- name: rspec
36
23
  prerelease: false
37
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
38
33
  none: false
39
- requirements:
34
+ requirements:
40
35
  - - ~>
41
- - !ruby/object:Gem::Version
42
- hash: 39
43
- segments:
44
- - 2
45
- - 10
46
- - 0
36
+ - !ruby/object:Gem::Version
47
37
  version: 2.10.0
48
38
  type: :development
49
- version_requirements: *id002
50
- description: "\"This gem scraps GoogleTranslate and returns the translation for the given search phrase. If multiple translations are available, multiple are returned.\""
51
- email:
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.10.0
46
+ description: ! '"This gem scraps GoogleTranslate and returns the translation for the
47
+ given search phrase. If multiple translations are available, multiple are returned."'
48
+ email:
52
49
  - me@sebglazebrook.com
53
50
  executables: []
54
-
55
51
  extensions: []
56
-
57
52
  extra_rdoc_files: []
58
-
59
- files:
53
+ files:
60
54
  - .gitignore
61
55
  - Gemfile
62
56
  - LICENSE
@@ -64,43 +58,45 @@ files:
64
58
  - Rakefile
65
59
  - google_translate_scraper.gemspec
66
60
  - lib/google_translate_scraper.rb
67
- - lib/google_translate_scraper/translator.rb
61
+ - lib/google_translate_scraper/models/error.rb
62
+ - lib/google_translate_scraper/models/response.rb
63
+ - lib/google_translate_scraper/models/translation.rb
64
+ - lib/google_translate_scraper/models/translator.rb
68
65
  - lib/google_translate_scraper/version.rb
69
66
  - spec/google_translate_scraper_spec.rb
67
+ - spec/models/error_spec.rb
68
+ - spec/models/response_spec.rb
69
+ - spec/models/translation_spec.rb
70
+ - spec/models/translator_spec.rb
70
71
  - spec/spec_helper.rb
71
- homepage: ""
72
+ homepage: ''
72
73
  licenses: []
73
-
74
74
  post_install_message:
75
75
  rdoc_options: []
76
-
77
- require_paths:
76
+ require_paths:
78
77
  - lib
79
- required_ruby_version: !ruby/object:Gem::Requirement
78
+ required_ruby_version: !ruby/object:Gem::Requirement
80
79
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- hash: 3
85
- segments:
86
- - 0
87
- version: "0"
88
- required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
85
  none: false
90
- requirements:
91
- - - ">="
92
- - !ruby/object:Gem::Version
93
- hash: 3
94
- segments:
95
- - 0
96
- version: "0"
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
97
90
  requirements: []
98
-
99
91
  rubyforge_project:
100
- rubygems_version: 1.8.9
92
+ rubygems_version: 1.8.24
101
93
  signing_key:
102
94
  specification_version: 3
103
- summary: "\"This gem scrapes Google Translate and their results.\""
104
- test_files:
95
+ summary: ! '"This gem scrapes Google Translate and their results."'
96
+ test_files:
105
97
  - spec/google_translate_scraper_spec.rb
98
+ - spec/models/error_spec.rb
99
+ - spec/models/response_spec.rb
100
+ - spec/models/translation_spec.rb
101
+ - spec/models/translator_spec.rb
106
102
  - spec/spec_helper.rb
@@ -1,62 +0,0 @@
1
- module GoogleTranslateScraper
2
- require 'net/http'
3
- require 'nokogiri'
4
-
5
- class Translator
6
-
7
- @@input_language = nil
8
- @@target_language = nil
9
- @@text = nil
10
-
11
- def translate(input_language, target_language, text )
12
- @@input_language = input_language
13
- @@target_language = target_language
14
- @@text = text
15
- get_translation
16
- end
17
-
18
- def get_translation
19
- # set POST variables
20
- js = "n"
21
- prev = "_t"
22
- hv = "en" # i think this is the language of the website
23
- ie = "UTF-8"
24
- layout = "2"
25
- eotf = "1"
26
- file = ""
27
-
28
- # download the webpage
29
- http_response = Net::HTTP.post_form(URI.parse('http://translate.google.com/'), {"sl" => @@input_language, "tl" => @@target_language, "js" => "n", "prev" => "_t", "hv" => "en", "ie" =>
30
- "UTF-8" , "layout" => "2", "eotf" => "1", "text" => @@text, "file" => ""})
31
- # get the string
32
- html_string = http_response.body
33
- # convert string to a Nokogiri document file
34
- html_doc = Nokogiri::HTML(html_string)
35
- # find the translation
36
- translation = Array.new
37
- translation = get_dictionary_translation(html_doc)
38
- # check for non-dictionary translations
39
- if translation.empty?
40
- translation[0] = get_non_dictionary_translation(html_doc).text
41
- end
42
- return translation
43
-
44
- end
45
-
46
- # when there are multiple translations
47
- def get_dictionary_translation html_doc
48
- translations = Array.new
49
- translation = html_doc.xpath('//div[@id="gt-res-dict"]/ol/li/div/div')
50
- translation.each do |t|
51
- translations.push t.text
52
- end
53
- translations
54
- end
55
-
56
- # when there is a phrase with no dictionary translation i.e multiple possible translations
57
- def get_non_dictionary_translation html_doc
58
- translation = html_doc.xpath('//*[@id="result_box"]/span')
59
- end
60
-
61
- end
62
- end