easy_captcha_solver 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/easy_captcha_solver.rb +40 -41
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e812e7f2b327ace5217bdb9f41d62b36d20dafc
|
4
|
+
data.tar.gz: 25e4e3575a6a4e7cb8e0d0c9fa592780b18e8c41
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1a34ef70f02a8324523be85414bcce4c0a6f68caa5442d32dc6b98597fb37f7059820f970d64b08829d4d32c8d15d6c0d61711e00c9bfef570728c3fe773f05
|
7
|
+
data.tar.gz: 2bb91e8f255736e6b4703dc82b71761b632f11336356d409c54ae692b57558d64b02cfea9bcdd8ff94fb220c5add6b140f954299c8778b55eeafe99f87e93020
|
data/lib/easy_captcha_solver.rb
CHANGED
@@ -5,59 +5,58 @@ class EasyCaptchaSolver
|
|
5
5
|
attr_reader :captcha
|
6
6
|
|
7
7
|
def initialize ( options = {} )
|
8
|
-
|
9
|
-
|
10
|
-
agent.user_agent_alias = 'Mac Safari'
|
11
|
-
}
|
8
|
+
image = options[:image_path] if options[:image_path]
|
9
|
+
image = get_captcha_image(options[:image_url]) if options[:image_url]
|
12
10
|
|
13
|
-
|
11
|
+
throw Exception.new "Image path or image URL must be provided.
|
12
|
+
Example: easy_c = EasyCaptcha.new( image_url: 'http://www.example.com/captcha' )
|
13
|
+
or easy_c = EasyCaptcha.new( image_path: './captcha.jpg' )" unless image
|
14
14
|
|
15
|
-
#
|
16
|
-
if options[:image_url]
|
17
|
-
|
18
|
-
|
19
|
-
# Guess image extension and rename tmp file
|
20
|
-
@image = "./tmp_image.#{get_image_extension(image)}"
|
21
|
-
File.rename( image, @image)
|
22
|
-
end
|
15
|
+
# Try to solve the captcha and delete temp img if necessary
|
16
|
+
solve_captcha(image) ensure File.delete(image) if options[:image_url] && File.exist?(image)
|
17
|
+
end
|
23
18
|
|
24
|
-
|
25
|
-
|
26
|
-
|
19
|
+
private
|
20
|
+
def get_captcha_image( image_url )
|
21
|
+
agent = Mechanize.new { |agent|
|
22
|
+
agent.user_agent_alias = 'Mac Safari'
|
23
|
+
}
|
27
24
|
|
28
|
-
|
25
|
+
# Save a file instead of trying to solve the captcha from memory
|
26
|
+
# because of tesseract limitations with .png images
|
27
|
+
image = agent.get(image_url).save! "./tmp_image"
|
28
|
+
image_extension = "./tmp_image.#{get_image_extension(image)}"
|
29
|
+
File.rename( image, image_extension)
|
29
30
|
|
31
|
+
image_extension
|
30
32
|
end
|
31
33
|
|
32
|
-
|
33
|
-
|
34
|
-
def solve_captcha()
|
35
|
-
# Solve captcha using Tesseract-ocr
|
34
|
+
def solve_captcha(image)
|
36
35
|
e = Tesseract::Engine.new {|e|
|
37
36
|
e.language = :eng
|
38
37
|
e.blacklist = '|'
|
39
38
|
}
|
40
|
-
|
41
|
-
@captcha = e.text_for(@image).strip # => 'ABC'
|
39
|
+
@captcha = e.text_for(image).strip # => 'ABC'
|
42
40
|
end
|
43
|
-
end
|
44
41
|
|
45
|
-
def get_image_extension(local_file_path)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
42
|
+
def get_image_extension(local_file_path)
|
43
|
+
png = Regexp.new("\x89PNG".force_encoding("binary"))
|
44
|
+
jpg = Regexp.new("\xff\xd8\xff\xe0\x00\x10JFIF".force_encoding("binary"))
|
45
|
+
jpg2 = Regexp.new("\xff\xd8\xff\xe1(.*){2}Exif".force_encoding("binary"))
|
46
|
+
case IO.read(local_file_path, 10)
|
47
|
+
when /^GIF8/
|
48
|
+
'gif'
|
49
|
+
when /^#{png}/
|
50
|
+
'png'
|
51
|
+
when /^#{jpg}/
|
52
|
+
'jpg'
|
53
|
+
when /^#{jpg2}/
|
54
|
+
'jpg'
|
55
|
+
else
|
56
|
+
mime_type = `file #{local_file_path} --mime-type`.gsub("\n", '') # Works on linux and mac
|
57
|
+
raise UnprocessableEntity, "unknown file type" if !mime_type
|
58
|
+
mime_type.split(':')[1].split('/')[1].gsub('x-', '').gsub(/jpeg/, 'jpg').gsub(/text/, 'txt').gsub(/x-/, '')
|
59
|
+
end
|
62
60
|
end
|
61
|
+
|
63
62
|
end
|