google_translate_scraper 0.1.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -12,7 +12,7 @@ Add this line to your application's Gemfile:
12
12
 
13
13
  And then execute:
14
14
 
15
- $ bundle
15
+ $ bundle install
16
16
 
17
17
  Or install it yourself as:
18
18
 
@@ -22,11 +22,11 @@ Or install it yourself as:
22
22
 
23
23
  1- require 'google_translate_scraper'
24
24
 
25
- 2- translator = GoogleTranslateScraper::Translator.new
25
+ 2- response = GoogleTranslateScraper.translate(:source_language => 'en', :target_language => 'sv', :search_text => 'hey')
26
26
 
27
- 3- translations = translations = translator.translate( source_language = "en", target_language = "sv", search_phrase = "sup fool" )
27
+ 3- response.translations.first
28
28
 
29
- NOTE: source and target language values must be a string containg the ISO 639-1 letter code for that language. For a list see here: http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
29
+ NOTE: source and target language values must be a string containg the ISO 639-1 two letter code for that language. For a list see here: http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
30
30
 
31
31
  ## Contributing
32
32
 
@@ -1,6 +1,12 @@
1
1
  require "google_translate_scraper/version"
2
- require "google_translate_scraper/translator"
2
+ require "google_translate_scraper/models/translator"
3
+ require "google_translate_scraper/models/error"
4
+ require "google_translate_scraper/models/translation"
5
+ require "google_translate_scraper/models/response"
3
6
 
4
7
  module GoogleTranslateScraper
5
- # Your code goes here...
8
+
9
+ def self.translate args
10
+ Models::Translator.new(args).translate
11
+ end
6
12
  end
@@ -0,0 +1,17 @@
1
+ module GoogleTranslateScraper
2
+ module Models
3
+
4
+ class Error
5
+
6
+ attr_accessor :message
7
+
8
+ def initialize(arguments = {})
9
+ arguments.each do |key, value|
10
+ instance_variable_set("@#{key}", value)
11
+ end
12
+ end
13
+
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,14 @@
1
+ module GoogleTranslateScraper
2
+ module Models
3
+
4
+ class Response
5
+
6
+ attr_accessor :errors, :translations
7
+
8
+ def initialize
9
+ @errors = []
10
+ @translations = []
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ module GoogleTranslateScraper
2
+ module Models
3
+
4
+ class Translation
5
+
6
+ attr_accessor :source_language, :target_language, :search_text, :translated_text
7
+
8
+ def initialize(arguments = {})
9
+ arguments.each do |key, value|
10
+ instance_variable_set("@#{key}", value)
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,96 @@
1
+ module GoogleTranslateScraper
2
+ module Models
3
+ require 'net/http'
4
+ require 'nokogiri'
5
+
6
+ class Translator
7
+
8
+ attr_accessor :source_language, :target_language, :search_text, :response
9
+
10
+ def initialize(arguments)
11
+ arguments.each do |key, value|
12
+ instance_variable_set("@#{key}", value)
13
+ end
14
+ @response = Response.new
15
+ end
16
+
17
+ def translate
18
+ if valid?
19
+ populate_translations
20
+ end
21
+ response
22
+ end
23
+
24
+ def required_variables
25
+ %w[source_language target_language search_text]
26
+ end
27
+
28
+ private
29
+
30
+ def valid?
31
+ validate_required_variables
32
+ end
33
+
34
+ def validate_required_variables
35
+ required_variables.each do |required_instance_variable|
36
+ add_error("Required argument missing: #{required_instance_variable}") unless instance_variable_get "@#{required_instance_variable}"
37
+ end
38
+ response.errors.size > 0 ? false : true
39
+ end
40
+
41
+ def populate_translations
42
+ fetch_from_google
43
+ add_error("No translations were found. Google might have changed their HTML") unless @response.translations.size > 0
44
+ end
45
+
46
+ def page
47
+ # set POST variables
48
+ js = "n"
49
+ prev = "_t"
50
+ hv = "en"
51
+ ie = "UTF-8"
52
+ layout = "2"
53
+ eotf = "1"
54
+ file = ""
55
+
56
+ http_response = Net::HTTP.post_form(URI.parse('http://translate.google.com/'), {:sl => source_language, :tl => target_language, :js => js, :prev => prev, hv => hv, :ie =>
57
+ ie , :layout => layout, :eotf => eotf, :text => search_text, :file => file})
58
+ html_string = http_response.body
59
+ Nokogiri::HTML(html_string)
60
+ end
61
+
62
+ def fetch_from_google
63
+ translation = get_dictionary_translation(page)
64
+ add_translation(get_non_dictionary_translation(page).text) if response.translations.size == 0
65
+ end
66
+
67
+ # when there are multiple translations
68
+ def get_dictionary_translation html_doc
69
+ translations = Array.new
70
+ translation = html_doc.xpath("//span[@id='result_box']")
71
+ translation.each do |t|
72
+ add_translation t.text
73
+ end
74
+ translations
75
+ end
76
+
77
+ # when there is a phrase with no dictionary translation i.e multiple possible translations
78
+ def get_non_dictionary_translation html_doc
79
+ html_doc.xpath('//*[@id="result_box"]/span')
80
+ end
81
+
82
+ def add_translation translated_text
83
+ response.translations << Translation.new(
84
+ :source_language => @source_language,
85
+ :target_language => @target_language,
86
+ :search_text => @search_text,
87
+ :translated_text => translated_text
88
+ )
89
+ end
90
+
91
+ def add_error error_message
92
+ response.errors << Error.new(:message => error_message)
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,3 +1,3 @@
1
1
  module GoogleTranslateScraper
2
- VERSION = "0.1.1"
2
+ VERSION = "1.0.0"
3
3
  end
@@ -1,31 +1,9 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe GoogleTranslateScraper do
4
- it 'should return an array' do
5
- translator = GoogleTranslateScraper::Translator.new
6
- results = translator.translate("en", "sv", "Hello")
7
- results.should be_a_kind_of Array
8
- end
9
-
10
- # dictionary translations
11
- it 'should return an array of Swedish translations for an English word' do
12
- translator = GoogleTranslateScraper::Translator.new
13
- results = translator.translate("en", "sv", "Hello")
14
- results.size.should > 1
15
- end
16
-
17
- # non-dictinary translations
18
- it 'should return a Swedish translations for a short English sentence' do
19
- translator = GoogleTranslateScraper::Translator.new
20
- results = translator.translate("en", "sv", "Hello there")
21
- results.size.should eql 1
22
- results[0].should eql"Hallå där"
23
- end
24
-
25
- it 'should return a Swedish translations for a long English sentence' do
26
- translator = GoogleTranslateScraper::Translator.new
27
- results = translator.translate("en","sv","Hi how are you? I'm good how are you?")
28
- results.size.should eql 1
29
- results[0].should eql"Hej hur mår du? Jag är bra hur mår du?"
4
+
5
+ it "scrapes and translates google translate" do
6
+ response = subject.translate(:source_language => 'en', :target_language => 'sv', :search_text => 'hey')
7
+ response.translations.first.translated_text.should == 'hej'
30
8
  end
31
9
  end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe GoogleTranslateScraper::Models::Error do
4
+
5
+ describe "attributes" do
6
+
7
+ it "has a message" do
8
+ subject.respond_to?(:message).should == true
9
+ end
10
+ end
11
+
12
+ describe "initialize" do
13
+
14
+ let(:arguments) { {:message => "You didn't provide a source language my man!"} }
15
+
16
+ it "sets all instance variables given" do
17
+
18
+ translation = subject.class.new(arguments)
19
+ arguments.keys.each do |key|
20
+ translation.send(key).should == arguments[key]
21
+ end
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ describe GoogleTranslateScraper::Models::Response do
4
+
5
+ describe "errors" do
6
+
7
+ before do
8
+ subject.errors << GoogleTranslateScraper::Models::Error.new(:message => "You did something wrong!")
9
+ end
10
+
11
+ it "is iterable" do
12
+ subject.errors.respond_to?(:each).should == true
13
+ end
14
+
15
+ it "is a list of error objects" do
16
+ subject.errors.first.class.should == GoogleTranslateScraper::Models::Error
17
+ end
18
+ end
19
+
20
+ describe "translations" do
21
+
22
+ before do
23
+ subject.translations << GoogleTranslateScraper::Models::Translation.new(:source_language => 'sv', :target_language => 'en', :search_text => 'Hej', :translated_text => 'Hey')
24
+ end
25
+
26
+ it "is iterable" do
27
+ subject.translations.respond_to?(:each).should == true
28
+ end
29
+
30
+ it "is a list of translation objects" do
31
+ subject.translations.first.class.should == GoogleTranslateScraper::Models::Translation
32
+ end
33
+
34
+ end
35
+
36
+ end
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+
3
+ describe GoogleTranslateScraper::Models::Translation do
4
+
5
+ describe "attributes" do
6
+
7
+ %w[ source_language target_language search_text translated_text ].each do |attribute|
8
+ it "has a #{attribute}" do
9
+ subject.respond_to?(attribute).should == true
10
+ end
11
+ end
12
+ end
13
+
14
+ describe "initialize" do
15
+
16
+ let(:arguments) { {:source_language => 'sv', :target_language => 'en', :search_text => 'Hej!'} }
17
+
18
+ it "sets all instance variables given" do
19
+
20
+ translation = subject.class.new(arguments)
21
+ arguments.keys.each do |key|
22
+ translation.send(key).should == arguments[key]
23
+ end
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,75 @@
1
+ require 'spec_helper'
2
+
3
+ describe GoogleTranslateScraper::Models::Translator do
4
+
5
+ let(:arguments) {{:source_language => 'en', :target_language => 'sv', :search_text => 'hey'}}
6
+ let(:subject) { GoogleTranslateScraper::Models::Translator.new(arguments)}
7
+
8
+ describe "format of response" do
9
+
10
+ it "returns a Response object" do
11
+ response = subject.translate
12
+ response.class.should == GoogleTranslateScraper::Models::Response
13
+ end
14
+ end
15
+
16
+ describe "successful translation" do
17
+
18
+ context "word/dictionary translation" do
19
+
20
+ let(:arguments) {{:source_language => 'en', :target_language => 'sv', :search_text => 'cat'}}
21
+ let(:subject) { GoogleTranslateScraper::Models::Translator.new(arguments).translate }
22
+
23
+ it "returns a translation" do
24
+ subject.translations.first.translated_text.should == 'katt'
25
+ end
26
+
27
+ end
28
+
29
+ context "phrase/non-dictionary translation" do
30
+
31
+ let(:arguments) {{:source_language => 'en', :target_language => 'sv', :search_text => 'the little cat'}}
32
+ let(:subject) { GoogleTranslateScraper::Models::Translator.new(arguments).translate }
33
+
34
+ it "returns a translation" do
35
+ subject.translations.size.should == 1
36
+ subject.translations.first.translated_text.should == 'den lilla katten'
37
+ end
38
+
39
+ end
40
+ end
41
+
42
+ describe "non-successful translation" do
43
+
44
+ let(:arguments) { {:source_language => 'en', :target_language => 'sv', :search_text => 'hello my friend'} }
45
+ let(:subject) { GoogleTranslateScraper::Models::Translator.new(arguments)}
46
+
47
+ context "a required argument was missing" do
48
+
49
+ it "returns an error" do
50
+ subject.instance_variable_set("@source_language", nil)
51
+ response = subject.translate
52
+ response.errors.first.message.should == "Required argument missing: source_language"
53
+ end
54
+ end
55
+
56
+ context "multiple arguments are missing" do
57
+
58
+ it "returns multiple errors" do
59
+ subject.instance_variable_set("@source_language", nil)
60
+ subject.instance_variable_set("@target_language", nil)
61
+ response = subject.translate
62
+ response.errors.size.should == 2
63
+ end
64
+ end
65
+
66
+ context "no translations were found" do
67
+
68
+ it "returns an error" do
69
+ subject.stub(:fetch_from_google)
70
+ response = subject.translate
71
+ response.errors.first.message.should == "No translations were found. Google might have changed their HTML"
72
+ end
73
+ end
74
+ end
75
+ end
metadata CHANGED
@@ -1,62 +1,56 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: google_translate_scraper
3
- version: !ruby/object:Gem::Version
4
- hash: 25
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 1
10
- version: 0.1.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Seb Glazebrook
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-05-08 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2013-01-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: nokogiri
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 3
29
- segments:
30
- - 0
31
- version: "0"
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
32
22
  type: :runtime
33
- version_requirements: *id001
34
- - !ruby/object:Gem::Dependency
35
- name: rspec
36
23
  prerelease: false
37
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
38
33
  none: false
39
- requirements:
34
+ requirements:
40
35
  - - ~>
41
- - !ruby/object:Gem::Version
42
- hash: 39
43
- segments:
44
- - 2
45
- - 10
46
- - 0
36
+ - !ruby/object:Gem::Version
47
37
  version: 2.10.0
48
38
  type: :development
49
- version_requirements: *id002
50
- description: "\"This gem scraps GoogleTranslate and returns the translation for the given search phrase. If multiple translations are available, multiple are returned.\""
51
- email:
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.10.0
46
+ description: ! '"This gem scraps GoogleTranslate and returns the translation for the
47
+ given search phrase. If multiple translations are available, multiple are returned."'
48
+ email:
52
49
  - me@sebglazebrook.com
53
50
  executables: []
54
-
55
51
  extensions: []
56
-
57
52
  extra_rdoc_files: []
58
-
59
- files:
53
+ files:
60
54
  - .gitignore
61
55
  - Gemfile
62
56
  - LICENSE
@@ -64,43 +58,45 @@ files:
64
58
  - Rakefile
65
59
  - google_translate_scraper.gemspec
66
60
  - lib/google_translate_scraper.rb
67
- - lib/google_translate_scraper/translator.rb
61
+ - lib/google_translate_scraper/models/error.rb
62
+ - lib/google_translate_scraper/models/response.rb
63
+ - lib/google_translate_scraper/models/translation.rb
64
+ - lib/google_translate_scraper/models/translator.rb
68
65
  - lib/google_translate_scraper/version.rb
69
66
  - spec/google_translate_scraper_spec.rb
67
+ - spec/models/error_spec.rb
68
+ - spec/models/response_spec.rb
69
+ - spec/models/translation_spec.rb
70
+ - spec/models/translator_spec.rb
70
71
  - spec/spec_helper.rb
71
- homepage: ""
72
+ homepage: ''
72
73
  licenses: []
73
-
74
74
  post_install_message:
75
75
  rdoc_options: []
76
-
77
- require_paths:
76
+ require_paths:
78
77
  - lib
79
- required_ruby_version: !ruby/object:Gem::Requirement
78
+ required_ruby_version: !ruby/object:Gem::Requirement
80
79
  none: false
81
- requirements:
82
- - - ">="
83
- - !ruby/object:Gem::Version
84
- hash: 3
85
- segments:
86
- - 0
87
- version: "0"
88
- required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ! '>='
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
85
  none: false
90
- requirements:
91
- - - ">="
92
- - !ruby/object:Gem::Version
93
- hash: 3
94
- segments:
95
- - 0
96
- version: "0"
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
97
90
  requirements: []
98
-
99
91
  rubyforge_project:
100
- rubygems_version: 1.8.9
92
+ rubygems_version: 1.8.24
101
93
  signing_key:
102
94
  specification_version: 3
103
- summary: "\"This gem scrapes Google Translate and their results.\""
104
- test_files:
95
+ summary: ! '"This gem scrapes Google Translate and their results."'
96
+ test_files:
105
97
  - spec/google_translate_scraper_spec.rb
98
+ - spec/models/error_spec.rb
99
+ - spec/models/response_spec.rb
100
+ - spec/models/translation_spec.rb
101
+ - spec/models/translator_spec.rb
106
102
  - spec/spec_helper.rb
@@ -1,62 +0,0 @@
1
- module GoogleTranslateScraper
2
- require 'net/http'
3
- require 'nokogiri'
4
-
5
- class Translator
6
-
7
- @@input_language = nil
8
- @@target_language = nil
9
- @@text = nil
10
-
11
- def translate(input_language, target_language, text )
12
- @@input_language = input_language
13
- @@target_language = target_language
14
- @@text = text
15
- get_translation
16
- end
17
-
18
- def get_translation
19
- # set POST variables
20
- js = "n"
21
- prev = "_t"
22
- hv = "en" # i think this is the language of the website
23
- ie = "UTF-8"
24
- layout = "2"
25
- eotf = "1"
26
- file = ""
27
-
28
- # download the webpage
29
- http_response = Net::HTTP.post_form(URI.parse('http://translate.google.com/'), {"sl" => @@input_language, "tl" => @@target_language, "js" => "n", "prev" => "_t", "hv" => "en", "ie" =>
30
- "UTF-8" , "layout" => "2", "eotf" => "1", "text" => @@text, "file" => ""})
31
- # get the string
32
- html_string = http_response.body
33
- # convert string to a Nokogiri document file
34
- html_doc = Nokogiri::HTML(html_string)
35
- # find the translation
36
- translation = Array.new
37
- translation = get_dictionary_translation(html_doc)
38
- # check for non-dictionary translations
39
- if translation.empty?
40
- translation[0] = get_non_dictionary_translation(html_doc).text
41
- end
42
- return translation
43
-
44
- end
45
-
46
- # when there are multiple translations
47
- def get_dictionary_translation html_doc
48
- translations = Array.new
49
- translation = html_doc.xpath('//div[@id="gt-res-dict"]/ol/li/div/div')
50
- translation.each do |t|
51
- translations.push t.text
52
- end
53
- translations
54
- end
55
-
56
- # when there is a phrase with no dictionary translation i.e multiple possible translations
57
- def get_non_dictionary_translation html_doc
58
- translation = html_doc.xpath('//*[@id="result_box"]/span')
59
- end
60
-
61
- end
62
- end