easy_captcha_solver 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/easy_captcha_solver.rb +40 -41
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dcb594800ad2a3d11471aa0b1b96bf5a3cf9bf97
4
- data.tar.gz: 342afccdf933e33ff81699d93029c93458ac0fdb
3
+ metadata.gz: 0e812e7f2b327ace5217bdb9f41d62b36d20dafc
4
+ data.tar.gz: 25e4e3575a6a4e7cb8e0d0c9fa592780b18e8c41
5
5
  SHA512:
6
- metadata.gz: 49dcf36addcbd3e6b4985ec06afb85bedea30cf03eeb22f748c129ef5b539f5b64622bf78f5748b80b6cf89bb8e315c5ee979f75182f9a701ff33b3a01d6da9a
7
- data.tar.gz: 0ec408b14ec8bea14155a08b4758df777b9fe4bfaa3676ae0a02225a53a299fe2f8d902d5ee3f5ee4a8ab5ceff8077f03159692b9b09ac962aa270f98430f2d8
6
+ metadata.gz: b1a34ef70f02a8324523be85414bcce4c0a6f68caa5442d32dc6b98597fb37f7059820f970d64b08829d4d32c8d15d6c0d61711e00c9bfef570728c3fe773f05
7
+ data.tar.gz: 2bb91e8f255736e6b4703dc82b71761b632f11336356d409c54ae692b57558d64b02cfea9bcdd8ff94fb220c5add6b140f954299c8778b55eeafe99f87e93020
@@ -5,59 +5,58 @@ class EasyCaptchaSolver
5
5
  attr_reader :captcha
6
6
 
7
7
  def initialize ( options = {} )
8
- # Mechanize initializacion, pretends to be Mac Safari
9
- agent = Mechanize.new { |agent|
10
- agent.user_agent_alias = 'Mac Safari'
11
- }
8
+ image = options[:image_path] if options[:image_path]
9
+ image = get_captcha_image(options[:image_url]) if options[:image_url]
12
10
 
13
- @image = options[:image_path] if options[:image_path]
11
+ throw Exception.new "Image path or image URL must be provided.
12
+ Example: easy_c = EasyCaptcha.new( image_url: 'http://www.example.com/captcha' )
13
+ or easy_c = EasyCaptcha.new( image_path: './captcha.jpg' )" unless image
14
14
 
15
- # If URL, save a file instead of trying to solve the captcha from memory because of tesseract limitations with .png images
16
- if options[:image_url]
17
- image = agent.get(options[:image_url]).save! "./tmp_image"
18
-
19
- # Guess image extension and rename tmp file
20
- @image = "./tmp_image.#{get_image_extension(image)}"
21
- File.rename( image, @image)
22
- end
15
+ # Try to solve the captcha and delete temp img if necessary
16
+ solve_captcha(image) ensure File.delete(image) if options[:image_url] && File.exist?(image)
17
+ end
23
18
 
24
- unless @image
25
- throw Exception.new "A local image path or a image URL must be provided. Example: easy_c = EasyCaptcha.new( image_url: 'http://www.example.com/captcha_img.jpg')"
26
- end
19
+ private
20
+ def get_captcha_image( image_url )
21
+ agent = Mechanize.new { |agent|
22
+ agent.user_agent_alias = 'Mac Safari'
23
+ }
27
24
 
28
- solve_captcha ensure File.delete(@image) if options[:image_url]
25
+ # Save a file instead of trying to solve the captcha from memory
26
+ # because of tesseract limitations with .png images
27
+ image = agent.get(image_url).save! "./tmp_image"
28
+ image_extension = "./tmp_image.#{get_image_extension(image)}"
29
+ File.rename( image, image_extension)
29
30
 
31
+ image_extension
30
32
  end
31
33
 
32
- private
33
-
34
- def solve_captcha()
35
- # Solve captcha using Tesseract-ocr
34
+ def solve_captcha(image)
36
35
  e = Tesseract::Engine.new {|e|
37
36
  e.language = :eng
38
37
  e.blacklist = '|'
39
38
  }
40
-
41
- @captcha = e.text_for(@image).strip # => 'ABC'
39
+ @captcha = e.text_for(image).strip # => 'ABC'
42
40
  end
43
- end
44
41
 
45
- def get_image_extension(local_file_path)
46
- png = Regexp.new("\x89PNG".force_encoding("binary"))
47
- jpg = Regexp.new("\xff\xd8\xff\xe0\x00\x10JFIF".force_encoding("binary"))
48
- jpg2 = Regexp.new("\xff\xd8\xff\xe1(.*){2}Exif".force_encoding("binary"))
49
- case IO.read(local_file_path, 10)
50
- when /^GIF8/
51
- 'gif'
52
- when /^#{png}/
53
- 'png'
54
- when /^#{jpg}/
55
- 'jpg'
56
- when /^#{jpg2}/
57
- 'jpg'
58
- else
59
- mime_type = `file #{local_file_path} --mime-type`.gsub("\n", '') # Works on linux and mac
60
- raise UnprocessableEntity, "unknown file type" if !mime_type
61
- mime_type.split(':')[1].split('/')[1].gsub('x-', '').gsub(/jpeg/, 'jpg').gsub(/text/, 'txt').gsub(/x-/, '')
42
+ def get_image_extension(local_file_path)
43
+ png = Regexp.new("\x89PNG".force_encoding("binary"))
44
+ jpg = Regexp.new("\xff\xd8\xff\xe0\x00\x10JFIF".force_encoding("binary"))
45
+ jpg2 = Regexp.new("\xff\xd8\xff\xe1(.*){2}Exif".force_encoding("binary"))
46
+ case IO.read(local_file_path, 10)
47
+ when /^GIF8/
48
+ 'gif'
49
+ when /^#{png}/
50
+ 'png'
51
+ when /^#{jpg}/
52
+ 'jpg'
53
+ when /^#{jpg2}/
54
+ 'jpg'
55
+ else
56
+ mime_type = `file #{local_file_path} --mime-type`.gsub("\n", '') # Works on linux and mac
57
+ raise UnprocessableEntity, "unknown file type" if !mime_type
58
+ mime_type.split(':')[1].split('/')[1].gsub('x-', '').gsub(/jpeg/, 'jpg').gsub(/text/, 'txt').gsub(/x-/, '')
59
+ end
62
60
  end
61
+
63
62
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_captcha_solver
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Rodriguez