easy_captcha_solver 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/easy_captcha_solver.rb +40 -41
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dcb594800ad2a3d11471aa0b1b96bf5a3cf9bf97
4
- data.tar.gz: 342afccdf933e33ff81699d93029c93458ac0fdb
3
+ metadata.gz: 0e812e7f2b327ace5217bdb9f41d62b36d20dafc
4
+ data.tar.gz: 25e4e3575a6a4e7cb8e0d0c9fa592780b18e8c41
5
5
  SHA512:
6
- metadata.gz: 49dcf36addcbd3e6b4985ec06afb85bedea30cf03eeb22f748c129ef5b539f5b64622bf78f5748b80b6cf89bb8e315c5ee979f75182f9a701ff33b3a01d6da9a
7
- data.tar.gz: 0ec408b14ec8bea14155a08b4758df777b9fe4bfaa3676ae0a02225a53a299fe2f8d902d5ee3f5ee4a8ab5ceff8077f03159692b9b09ac962aa270f98430f2d8
6
+ metadata.gz: b1a34ef70f02a8324523be85414bcce4c0a6f68caa5442d32dc6b98597fb37f7059820f970d64b08829d4d32c8d15d6c0d61711e00c9bfef570728c3fe773f05
7
+ data.tar.gz: 2bb91e8f255736e6b4703dc82b71761b632f11336356d409c54ae692b57558d64b02cfea9bcdd8ff94fb220c5add6b140f954299c8778b55eeafe99f87e93020
@@ -5,59 +5,58 @@ class EasyCaptchaSolver
5
5
  attr_reader :captcha
6
6
 
7
7
  def initialize ( options = {} )
8
- # Mechanize initializacion, pretends to be Mac Safari
9
- agent = Mechanize.new { |agent|
10
- agent.user_agent_alias = 'Mac Safari'
11
- }
8
+ image = options[:image_path] if options[:image_path]
9
+ image = get_captcha_image(options[:image_url]) if options[:image_url]
12
10
 
13
- @image = options[:image_path] if options[:image_path]
11
+ throw Exception.new "Image path or image URL must be provided.
12
+ Example: easy_c = EasyCaptcha.new( image_url: 'http://www.example.com/captcha' )
13
+ or easy_c = EasyCaptcha.new( image_path: './captcha.jpg' )" unless image
14
14
 
15
- # If URL, save a file instead of trying to solve the captcha from memory because of tesseract limitations with .png images
16
- if options[:image_url]
17
- image = agent.get(options[:image_url]).save! "./tmp_image"
18
-
19
- # Guess image extension and rename tmp file
20
- @image = "./tmp_image.#{get_image_extension(image)}"
21
- File.rename( image, @image)
22
- end
15
+ # Try to solve the captcha and delete temp img if necessary
16
+ solve_captcha(image) ensure File.delete(image) if options[:image_url] && File.exist?(image)
17
+ end
23
18
 
24
- unless @image
25
- throw Exception.new "A local image path or a image URL must be provided. Example: easy_c = EasyCaptcha.new( image_url: 'http://www.example.com/captcha_img.jpg')"
26
- end
19
+ private
20
+ def get_captcha_image( image_url )
21
+ agent = Mechanize.new { |agent|
22
+ agent.user_agent_alias = 'Mac Safari'
23
+ }
27
24
 
28
- solve_captcha ensure File.delete(@image) if options[:image_url]
25
+ # Save a file instead of trying to solve the captcha from memory
26
+ # because of tesseract limitations with .png images
27
+ image = agent.get(image_url).save! "./tmp_image"
28
+ image_extension = "./tmp_image.#{get_image_extension(image)}"
29
+ File.rename( image, image_extension)
29
30
 
31
+ image_extension
30
32
  end
31
33
 
32
- private
33
-
34
- def solve_captcha()
35
- # Solve captcha using Tesseract-ocr
34
+ def solve_captcha(image)
36
35
  e = Tesseract::Engine.new {|e|
37
36
  e.language = :eng
38
37
  e.blacklist = '|'
39
38
  }
40
-
41
- @captcha = e.text_for(@image).strip # => 'ABC'
39
+ @captcha = e.text_for(image).strip # => 'ABC'
42
40
  end
43
- end
44
41
 
45
- def get_image_extension(local_file_path)
46
- png = Regexp.new("\x89PNG".force_encoding("binary"))
47
- jpg = Regexp.new("\xff\xd8\xff\xe0\x00\x10JFIF".force_encoding("binary"))
48
- jpg2 = Regexp.new("\xff\xd8\xff\xe1(.*){2}Exif".force_encoding("binary"))
49
- case IO.read(local_file_path, 10)
50
- when /^GIF8/
51
- 'gif'
52
- when /^#{png}/
53
- 'png'
54
- when /^#{jpg}/
55
- 'jpg'
56
- when /^#{jpg2}/
57
- 'jpg'
58
- else
59
- mime_type = `file #{local_file_path} --mime-type`.gsub("\n", '') # Works on linux and mac
60
- raise UnprocessableEntity, "unknown file type" if !mime_type
61
- mime_type.split(':')[1].split('/')[1].gsub('x-', '').gsub(/jpeg/, 'jpg').gsub(/text/, 'txt').gsub(/x-/, '')
42
+ def get_image_extension(local_file_path)
43
+ png = Regexp.new("\x89PNG".force_encoding("binary"))
44
+ jpg = Regexp.new("\xff\xd8\xff\xe0\x00\x10JFIF".force_encoding("binary"))
45
+ jpg2 = Regexp.new("\xff\xd8\xff\xe1(.*){2}Exif".force_encoding("binary"))
46
+ case IO.read(local_file_path, 10)
47
+ when /^GIF8/
48
+ 'gif'
49
+ when /^#{png}/
50
+ 'png'
51
+ when /^#{jpg}/
52
+ 'jpg'
53
+ when /^#{jpg2}/
54
+ 'jpg'
55
+ else
56
+ mime_type = `file #{local_file_path} --mime-type`.gsub("\n", '') # Works on linux and mac
57
+ raise UnprocessableEntity, "unknown file type" if !mime_type
58
+ mime_type.split(':')[1].split('/')[1].gsub('x-', '').gsub(/jpeg/, 'jpg').gsub(/text/, 'txt').gsub(/x-/, '')
59
+ end
62
60
  end
61
+
63
62
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_captcha_solver
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Rodriguez