textract 0.0.21 → 0.0.22
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/textract.rb +2 -1
- data/lib/textract/version.rb +1 -1
- data/spec/fixtures/vcr_cassettes/santorum.yml +259 -0
- data/spec/lib/textract_spec.rb +8 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a2771d60be29638b54f000fcd2cca8b4597858d
|
4
|
+
data.tar.gz: 301ca958701a51b3150d67194d4b196c05b17493
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5af8545f9287d0643ea2ae29998941d6e5df3cae0d4c9e2cd511115a7acaa1858801bfe8b43565ff8fc85e01d9a68d3dfc5f90bfaf00afb10bb66fe3e9990579
|
7
|
+
data.tar.gz: 2356e3a912ef6922c8c455441b0f951df1760ee5ca94bddcbbf4c8285af7b36346ba4e2034b404c4d05db90895194c7ccde5599f43eba723e86d759e6df26eb3
|
data/lib/textract.rb
CHANGED
@@ -36,7 +36,7 @@ module Textract
|
|
36
36
|
elsif !description.nil? and article.count == 0
|
37
37
|
els = [1,2,3]
|
38
38
|
i = 1
|
39
|
-
until els.count < 2
|
39
|
+
until els.count < 2 or i > description.split(" ").length
|
40
40
|
search_text = description.split(" ")[0..i].join(" ")
|
41
41
|
if search_text.index "'"
|
42
42
|
els = doc.search "[text()*=\"#{search_text}\"]"
|
@@ -49,6 +49,7 @@ module Textract
|
|
49
49
|
el = els[0]
|
50
50
|
article_el = el.parent
|
51
51
|
else
|
52
|
+
article_el = doc
|
52
53
|
# do something else if multiple or no matches
|
53
54
|
end
|
54
55
|
else
|
data/lib/textract/version.rb
CHANGED
@@ -0,0 +1,259 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: http://www.ricksantorum.com/about_rick
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Accept-Encoding:
|
11
|
+
- gzip,deflate,identity
|
12
|
+
Accept:
|
13
|
+
- "*/*"
|
14
|
+
User-Agent:
|
15
|
+
- Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML,
|
16
|
+
like Gecko) Version/5.1.1 Safari/534.51.22
|
17
|
+
Accept-Charset:
|
18
|
+
- ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
19
|
+
Accept-Language:
|
20
|
+
- en-us,en;q=0.5
|
21
|
+
Host:
|
22
|
+
- www.ricksantorum.com
|
23
|
+
Connection:
|
24
|
+
- keep-alive
|
25
|
+
Keep-Alive:
|
26
|
+
- 300
|
27
|
+
response:
|
28
|
+
status:
|
29
|
+
code: 200
|
30
|
+
message: OK
|
31
|
+
headers:
|
32
|
+
Date:
|
33
|
+
- Wed, 27 May 2015 21:42:56 GMT
|
34
|
+
Content-Type:
|
35
|
+
- text/html; charset=utf-8
|
36
|
+
Transfer-Encoding:
|
37
|
+
- chunked
|
38
|
+
Connection:
|
39
|
+
- keep-alive
|
40
|
+
Set-Cookie:
|
41
|
+
- __cfduid=dde41833c269c045244f76a32431da7e51432762976; expires=Thu, 26-May-16
|
42
|
+
21:42:56 GMT; path=/; domain=.ricksantorum.com; HttpOnly
|
43
|
+
- _nbuild_session=2b39847a7762391b9cee0cb09fa24255; path=/; HttpOnly
|
44
|
+
Access-Control-Allow-Origin:
|
45
|
+
- "*"
|
46
|
+
Access-Control-Request-Method:
|
47
|
+
- GET, POST, PUT, DELETE
|
48
|
+
X-Ua-Compatible:
|
49
|
+
- IE=Edge,chrome=1
|
50
|
+
Etag:
|
51
|
+
- W/"0996aa5b70c35c6c84d92f6c57f2476d"
|
52
|
+
Cache-Control:
|
53
|
+
- max-age=0, private, must-revalidate
|
54
|
+
X-Request-Id:
|
55
|
+
- ce09fd4ab2530bab13c1fde8b2d97916
|
56
|
+
X-Runtime:
|
57
|
+
- '0.135160'
|
58
|
+
X-Rack-Cache:
|
59
|
+
- miss
|
60
|
+
X-Powered-By:
|
61
|
+
- Phusion Passenger 4.0.48
|
62
|
+
Status:
|
63
|
+
- 200 OK
|
64
|
+
Vary:
|
65
|
+
- Accept-Encoding
|
66
|
+
Server:
|
67
|
+
- cloudflare-nginx
|
68
|
+
Cf-Ray:
|
69
|
+
- 1ed4e47c84fb06af-EWR
|
70
|
+
Content-Encoding:
|
71
|
+
- gzip
|
72
|
+
body:
|
73
|
+
encoding: ASCII-8BIT
|
74
|
+
string: !binary |-
|
75
|
+
H4sIAAAAAAAAA8xa6XPbNhb/HP0VKDtTSVOTFKnLskVnbSdN3EkT7ybpNs1k
|
76
|
+
PBAJUZBJgAZAHTn+950HkDJ1OJE7Wzf+YJHAew+/d+JibfjDk1fnb95dPkUT
|
77
|
+
lSYntSH8oASzOLAIs6CB4OikNkyJwiicYCGJCqxcje1D6NXNE6Uym9zkdBZY
|
78
|
+
f9hvT+1znmZY0VFCLBRypghTgXXxNCBRTFZcDKcksGaUzDMuVIVwTiM1CSIy
|
79
|
+
oyGx9csBoowqihNbhjghgbchRE1ISuyQJ1xU5Pzo9fzDzrnlntSGiqqEnJyO
|
80
|
+
eK7QL1ykRKDXhGHFBfoPDa/Ra8wUF3k6dA1lDdWGCWXXaCLIOLBAwyPXlQWV
|
81
|
+
w7CinI1ymkREOCFPXQ1Bur7b7XY7pO1HZISjXqcfDXot+PPclouVwuEkJUxJ
|
82
|
+
1+u0fa87OPR9eOr3vF635UZkjPNEGWGODKW0kCBJYEm1TIicEKK0NjIUNFNI
|
83
|
+
LTNQniyUO8UzbFqtkxkW6EqOr6TCQmUqaDAyR0+wIo1m04mJekNT0mgOXUO/
|
84
|
+
bsmY8zghtqSK2DMi6JiGWtWKWd+++/3Z4uL59OPg8nn3+cXLV9fk+vX0ostG
|
85
|
+
z/99Oe9g8WccPpu9VfziUIM1kVNy41xNCFM0pGp5pfg1YVYxdCjF2M6wwOkO
|
86
|
+
ttd//Pzq6U369tf24X9DX7wTyfM/xc+dy19fXifzj6/+TJ79+dto5sXPBQ/W
|
87
|
+
BJohQKB2p7ZmiBlnNMSJte7e+XzuCBper9wMjsUQM1fQfAvLyH8DkVIxzLej
|
88
|
+
ayNsnxDjgnX7rnEgKpE2gyRihhWdERTyNKVKkQgpjgSRigvKYqQmBJ2mRNAQ
|
89
|
+
MxQJglM05gJNsIjmXFwDSdktnRWQTPCMCLUMLB4f6dC/n0Irk6wJykVSEbOn
|
90
|
+
dXdKir43Ey2zqoWwUDRMdE27DS+a4phcSRFuhFfUZof4MBO8P5ukCydMeB6N
|
91
|
+
BWfKYUStaosLuQeFBAa/0rKkywWNKcOJbrR1ozPN4sdQO9qdzqDTu8MTmnTb
|
92
|
+
F/8EFBB2BYF/pyPBGZeCSBoRtk+hqyEEte7lGQrg3+fP6NOX41oNoZdnDmEz
|
93
|
+
KjiDYosCZGWCR3mow+jYEGQ4JhcR9LV7ZdsLepNTaLt9LoRWquU3MD165LrD
|
94
|
+
H96fPzl9c/q+9ujRozllEZ87V1D5TMXTVoCRdxTD490sQH2vGghyXPfDh5Mq
|
95
|
+
9NsYrUwo60GKp3jhmFkAZ1SaJJ3ihZvQkXSnNzkRy5y6nuO1nFY564U5uJky
|
96
|
+
XhDYOXX05FUxkn5PSURxYOEkgZQpLClFGFjfikksJVHSTbRX3BRTZkeHuE98
|
97
|
+
cjjGfuThXieKDjEeEey3cc/r9A+d6TqCqpv29ydChTfUXCmBAtQY50xHEmpE
|
98
|
+
B/KARk30qYaQiUV1gKbyAI2nEgUogtn2aUL0jH+2fIPjlzglDdl83/oAEYcQ
|
99
|
+
HaNGlepseRE1aNRsIkFULtgxmsogckJBsCIFTUM2odmhUUAjI2YqncKIWYLV
|
100
|
+
mIvUUXMogGZtMqdRTJQEexwDNCfDgjD1kkfEoVA21RkZc0EaBfSmkWogrKv/
|
101
|
+
+TNqQEJ9QlfkCL3/cIAEwdHyCJVGaYybn5ByroiT5XLSGDfRF/RFC/zSiHiY
|
102
|
+
gwYHyCrMe4CsAqg9n0qr2bxPpumU/eXsNMt0Htc9v9dvD3peq+95g163fj9Z
|
103
|
+
K7c2qv7MOPixwL3hh7rhrjePUcYdkAwoNmTXdSeWSxaiACmRE2PcjIPLgMF1
|
104
|
+
daKZnNP+mko3S3LJGXGmsm7oAYyOqRLLztBaQYIIQ3c7OuMHyLj5S7NxD6OX
|
105
|
+
uZpLohW+pkojDqOPPXYdQ4Ttn1pKLD+9KYQkHEeN5vGXEKtw0iDNT1/2FrMy
|
106
|
+
z1WMb1Bgfj5/Ru+LHIN3E4zv61eSqNMw5DlT9QNUf3tq9/x+r9f3e7ZX/2Di
|
107
|
+
/tEmwxMO5QbsCzyMM1KSbstOEj5/Qdk1EfUD7e2Scpv2PJeKp79joPQOUP1S
|
108
|
+
+wqG0Iui+gFq78/89tk5cI5xIgHlPTgvcUy0aYH/DEsaVvnXuJXA4TXQw6Zt
|
109
|
+
tw1WJC84jmCjock03XaCGbfFeK8Ui/FXUizGO1IM/mJcpFmjDhOcPKqjoDJa
|
110
|
+
ws32xskEVzzkCXqMCkLXlTKpoyPzbtav9Sb6GdWLVLUxw8lS0dBMkjG+Tdb/
|
111
|
+
d7rGuEzXMmER+gv5cUNmMD7kSPlY5MldDgI2kpB0Hw8ZIZq8tPlX7BwESC85
|
112
|
+
5JGFHpePritJmAtioSPTdOS6+swA7G45NzlmClb5pkzqVz2pVUbeEQa6vYid
|
113
|
+
LfMUNNpfYQZT2z1cZniB707XweAHmuRO/6H1swwzHx6FWESVlbLM0xSL5VWC
|
114
|
+
RUzMWnzzEKRghHV2hfFfsMa+YwNasvyFjd9uQZEuldWznB2bvrt4/8mtHlod
|
115
|
+
+OjlMc6yhNiK5+HEpiEAkvQjkYHV7S+6/Xvt64ol8p3HQtVDodXeCjZTvX7b
|
116
|
+
bw3cTSi2huBkzGy5DNXGAccmzwp+r7XotR4Ovtfehq8hrMH32vvC7/uLvv+A
|
117
|
+
8Hvb8DWEdfhmw7sP/N6i33tA+DuCR0NYh7938HheZ+F5nYdTwPe3FShArKng
|
118
|
+
+3ur4LcWnv+AGeB3d6hgQKyr0N1bhU5n4XUe0gv9HSoYEOsq9PdWoesvvO4D
|
119
|
+
JnLb26GCAbGmQtvbW4XD1sI7fMBAand2qGBArKvQ2VDBwDZrQ71scDMWPxzu
|
120
|
+
btsd45mG2/YX7XV7d9srg+rO7wm6P+gOVtC9QWfhDarxDt230WC6vyf4bb/b
|
121
|
+
W8Ef9BaD9arf7a3A687vC3qr62IWCU4jO5wInhLbG/gLb7AePa1uxQG6+/vS
|
122
|
+
otu6jZ/ewttwQOsWPHRuQE8xo2MiN09n/060A7cc1JlKzm6RmgVCZcWeSihF
|
123
|
+
xc2g/YYm5Hzz6rXldfwe3ljpb/NdbFxPpFLRhFSnl83NAqi4fdM71n96Hf+N
|
124
|
+
3fDLMwdn2TlnYxo7uSTiQr7gcUyiCzhj1+cnx5WttVtcf494tERhgqUMLNjP
|
125
|
+
2OWGBs18W4NCcKFgw6j2CE5RzDv8k7ac8HnRCmfSNAqstYsvZK7ZiTipDbHu
|
126
|
+
DiM74TEv3e9a5eATGkWE2WmEiqcktk6GNI3N0dzfGye+7x/2PReQ2ZQllJFV
|
127
|
+
TJsuC+Fka9O2ea0zdPHeei5kqadMH1LPw17H6FlRENruoSDDs1JFxTOb4RnE
|
128
|
+
cg4fWST0ZIhLlauR8BshSm+0QcTQTegGcaH2rl21myXm84pL/Yve8Vyg37ki
|
129
|
+
Xxf1lU8aIL0EHeWKWCfnq+eKOFcr4zI8q/ozJSy3laBxTETp1x9b1slQZpiV
|
130
|
+
7l3RkYUCtVk+dKF/N5Uu5HCwrCm0dXViQsLA6QmmayxwOVTUBpNcEZ19NX8A
|
131
|
+
fdE/UgyNFLPlTY4F0Y+jhIfXpjEPQwL3WIXrqhaq3ZoIMT6vFTAjOitOFyQx
|
132
|
+
t0b66i2wRji8jgXPWWQuUY9QLhJzSvnNwN66oDXG2LqiLensjEc0T68YFoLP
|
133
|
+
V9e1XvewM+jXm8emJIV8Bh4rDIEZTTGcq4xxRC70p0AVM45i24xpA1OCl6Ym
|
134
|
+
I8btFIsY/GGkgeAxwQpO9dZFgPEwZUQgkATFxJ4LnG1QCT7f4kvshbQ93/it
|
135
|
+
mkUr8OY0zob51Cq++imzqjaceCXZLdpNbau4taaArxT1tYOxoTvxVn4vf/bX
|
136
|
+
ZyJO7subRnYPmrOyMSE4sk4e4uBs6GY7gZawkLHuiCulj/3WU1FOsDAzJBRI
|
137
|
+
ZIKzUiNLOpJiqu+LS2fDu+JHj2U+mpJQBbcO/gnm6eBrVbJSbZGCs1QVWFej
|
138
|
+
BEOg1IarMaXURQdJaYOhckbVEl4kYRFUIl3+dIIXhbVkLE4zq3ALONUbWaqL
|
139
|
+
k6vmhKjHuUj2hPwTlMuqujOKg83z3V06SB7C13OCxHkCJ93SXuH8ii5jHJIR
|
140
|
+
59oyO2agsluD1M50skn2ON9TmX3tv439FtcW+GJKKqruRipt/BQFeTMsV9OH
|
141
|
+
Lk6mEqQ0isxnPhuEeg1qqliYECzGdIEmGIJmRoQp7FtxH/IkTw0nFKnNJWJZ
|
142
|
+
jIFI2roqfLMGrOqH1u1/AAAA//+8W8ly3MgR/ZUMXnRpMtQKj21O+EJxPBK9
|
143
|
+
iSGOpHMCSDTKLFQiamkM5qR/8Em/py9xvCz0Qo48c/NJYqOWrMx8L1+hs78F
|
144
|
+
yW/PG7vLPxP+43e1nxPP6/aH8nn8yIUc9ciw9a+nKyI7zwrvX6b/CwnRW4HL
|
145
|
+
qREJxLRX14oNxyr2l5eU6Ovn/1DKHDrMLtM6QpNQyrHsdh6fu0B5cImk1aDj
|
146
|
+
skEHU0aahB151wtxJtlLXLDUTohDR73bDTYgK0lIJQrlgTNBf1VRhYPXL7ao
|
147
|
+
jzqu286D0uzSgHmdUkk437hy6uo6rA97ZuytPb36jhbhmDb0d1S4DUGj4Jy1
|
148
|
+
4CWMSfhqj2YNncS+eGoH57so4Xv6q3e/cCPop/2bDmFDP3Bw4jf0wJEH+idH
|
149
|
+
xxu6lyxxQ/ecgVIz4S5xI97z0bi7QK9ebl9tau3DELOHWr3sIWmkq/M100dE
|
150
|
+
IG2IaRc5paiak81Qu0BYClR61f4U1aeZ0bsatpmXdMqUevLfyZOeR+edpKtn
|
151
|
+
BpEbJ26xPlMoYyMR+9e4u3ZDlXXM0BpE9jWGsNSlVCRRs9CojfPuF8sdnCuO
|
152
|
+
do6zo+7Vl5BF4qFcPkfFzAYLZGbH+ZS672UqDW7LgWDTmkpP7hrYCkM/BIeT
|
153
|
+
PGTOOFkNjtneSItes8egcyDb6ISPcyQmS8fOdeHr5y+ZehGPlV1cx8PIRnDM
|
154
|
+
KFOUJCFLd0VvXXoS1mmKyq2ldMvjxG4XMAfYc6H1xaK4d8kZYCqSNFhq3+nM
|
155
|
+
Xz9/SXR9Tdag4aTmCfI/5RJspb1rs8alAlVsErVc2pKkQrzlzFPxcMfgRkse
|
156
|
+
yOZYAjg6Zc4FZ5UoNAgVn033+QWAoe3WRqwbB+HoF/oDjc57+H6veNSVI0ed
|
157
|
+
hWiKDt/PgjBwRfgfsW7EO4G3jSFSjnqWojSIn7BAV1ohPjxeueiKPgmNJWWS
|
158
|
+
nyfDz4S+eUAHcxFQ9v4MQc1CElotkXeV2lq0GmJzjB05Rsc72Rz5YVPZLIoQ
|
159
|
+
ohun6JI8oyOf9NkZ5tWoKOgyX3e3OI4cSs9tru5CsUVi/wZH1tT7N1YbubOk
|
160
|
+
cCHoHiI7UVPWdD9wZ/V6dVb9dB3tFFXhPQzCztjDEiou37JrctFlmp33cLiE
|
161
|
+
HVoy6Tl91ExEfLDcWcU40BfGrA1zWYlp4jzA3ilqmsRY45Q4SShr2Q1HvqHs
|
162
|
+
RnmWNNbo0B0y/cPVwxW91ZLML+8PIFzBa2Vle329xebb6+vvajDP59ptZS1A
|
163
|
+
GIGRr16+/NMJDMD4kShWVGL6qCnTevVFQdlB29TO2ogmh1GikQ48Mzgw83JV
|
164
|
+
C0NWfSQN9InT4MIua3iRaNK5FqY0iXEsxDAYfT1G3dKWH4SQpqsbTsnyItEt
|
165
|
+
Ty4rJtu5TQVUJh3lwOZY6eINIwd6ekBZvKhpJz9PmlBaBqFbDbsoKVWKf83B
|
166
|
+
4g7vPX1yDy+863uwYQJbs0eW4Zi2L6DBgdCyCygCSl4NaIF3J3s8h27k+Eif
|
167
|
+
xPeo3u/NhXTTWnpvr6//WG2EpJHRnCXdgYKswJ8gnpW88L4WwnldMKr3NV8N
|
168
|
+
xvWZxsdeY/sMz6caBJljc9qBx8lpkI687FzyFZ0rWrPnefXbIC5oSZXwOiD6
|
169
|
+
mDv3aIuHM13MA900Gm0NTrDR41+LzGuNgW48dOBd6NHaQ/er3NIAh2wQLqGL
|
170
|
+
D6HB0I+uzW40F3x06iW0so6C4TbyVseGrbbclOySufXCiiCAM8iJvARlypxk
|
171
|
+
RQgCY++6wt4cv2dfqj+OANgXHyRy48EzlQW0dZIXClJVSiMHuSjdUz/3AHvG
|
172
|
+
GNwZcF+g3uGnTJTYpI87xwg11jpEoCU2ZPV4pmFz0ppG99Sw59BKR01Bh68Z
|
173
|
+
zGSyymUZaS9ZDRlN5D2KHBryLfMrcK0IB+v+sbegG2pLtvXTJFVyrS4K1Mle
|
174
|
+
vE4mmC7ssY5QiiukuIPoK5M5rIXDtKcfZNQ24ukoobMt4ITj4o3zANFPKPFR
|
175
|
+
Wo1dZX5U7ucE9OTekOqrF3POvYSQFr/n4PgFXhdXBvoWlwrcVxU0aAlrr7R4
|
176
|
+
E0cIKIn40Vmi21V9otKs7OjXvLcQHEKED+wt8qOl5YNMubLPdrsBuxof58gh
|
177
|
+
GciRNagnGUrByI7pVn1HnzgipKhkSiNeAmfteHkBpKDdrt5yKr/h9Q74JNCH
|
178
|
+
h8u7FFm8w5dGtfBtVgYy92oeKi4eluiY1s5XhmTNS8XGETp3kQP9GEU6mBU6
|
179
|
+
eigmMOqweXDtADec6oBfzvJ64mTBSJPLa+W2H+yZSoHcQ1ovZ8r1dUnDSp+9
|
180
|
+
i6Nffq0rWg5BM1SNzmTmZSVtDD4Q7Xbhpll40vDra1O9kBzuR61eVr9YnJjw
|
181
|
+
+uCQ+h29xuXGxMEb1+fv6a3O9C4I/cPl7IXeuOjpp0MUpaN3JdKPUAaLbXUX
|
182
|
+
oCIMGf+qbY7PViR8ixnwS6FKfc4mGM5MzWpfb5ezWwN2IOb09fOXQ528BM8k
|
183
|
+
WrSEnQAQ0G3rTlWfgySxxH8BAAD//9xdW2/cuhF+PvkVrICenIdob/b6FltB
|
184
|
+
4pMELtLGtZ1TtEUhcCXuWrFW0pG0thdF/3sxM5REUpeVN+s0qB+8Ei/D4YjX
|
185
|
+
b4Yc2mX7JHWPh1DFmzTI4uWajY9oaZDRhE9rLvGY4KiFnXOV3kMXuxUpfJks
|
186
|
+
ZzMYxn2+hrJocIWFJrAZ+372SnIASx/hs0NoBX/mWKfJaDyldQi026/xGmZ9
|
187
|
+
eA/jeyATkngXQRqqElsGKfdWIUwuKBUYkOMohD13sT4TvrHzxIzYgqDpIXM+
|
188
|
+
bD5oW2kM9kC1ZSIETmEmx45NU7kchCaj0ZT9RTywv8fpHQPTZigQVkYiDEXK
|
189
|
+
LnJ2w+9gpSibxqtqayq3ntkJextlD6LcP5yjIOOi29GXeQdTz3kcAth1rm7R
|
190
|
+
TtiVkNtiJIDrDVkGdZq/xemdgcrWkaAwuCPUFSAa2mM4v8bQrhhEEQAC8W9O
|
191
|
+
hzLaALDS+EHRniA4pBFtAqcQZcqW9kEBN+2b4PCzQIySjUKrU6lySgyxhkKy
|
192
|
+
7VDIa+BO0/rsSA7fBTduF1QBFG8tJx1pvgHW+4tJPnXKaVfqgFYRkAKipwDa
|
193
|
+
1AXvgYhe8U6Ymr045TBXGnDtfikP6niQZMZB3yD7qBJoZ8EiWiU2zFpGv4Sg
|
194
|
+
Ut12u1coJUnEoI45Gb22nBcfRc6+JABLZaBy3SMNVfOwuXtklwYyg2vZDPIY
|
195
|
+
WccQGQvHFEE1x6jaLlabLUV+G/tn1uXn6xuLcdxcnFlDiMyGlDKzACkhg5El
|
196
|
+
QEIJT3NMYfs852B3ECWrXJqgNBzalMYmNCZatHt44pn1oV4IDKVu4LdQ3gdr
|
197
|
+
US09QQ9uHrfk6OoDsrW4qtiGjip37AAu933YDku5yhbjB1kS8vUJHIt6bTmn
|
198
|
+
iXMaAliMn1XPaDmf0bqfhwzDmRf74nSIyZ3TWWpUSc+r2POUPZVeoMEbafkK
|
199
|
+
Dpksk1Dk4syK53M0ykgK1Socw5BFQeZS1j1Fz4ZEgjGzbdoiTeMUz8CVBemp
|
200
|
+
SA1D4XqMqsYp4htKAJMBaPlVEsYqiUPXx48oh6xieEptWEBZDg5CsFcDORWi
|
201
|
+
12mRXBQJA90lqnXSOCwGHbUQEhuF/RPD/mWxJOSeuI3BmOTM0ootBCtzl8LE
|
202
|
+
0lW51V+fTVzZakajVdWGTNH9I0i0Bru11BrK0iRYizelWXCi94ndylGrTzEj
|
203
|
+
PvAUQXjDKAYtZQpmiPvyCg6cBcrO86c4iNjNrWAfYBNt8mx+O0lK70xGZXbX
|
204
|
+
0frbRYHVYDYsln9gBBQJL38jx42zvYOfMy9OxBnpBtwkjedBKP44Odc7ZSHV
|
205
|
+
glCzWJ3GRUZ9laWvRtl1sIjYKsHlRlnFVsHhjzo44uQHz8bS5A+2zYYuTI8B
|
206
|
+
D91i4gjRgNKNV/kArw+ybQcOA8JiB5K05VMSU6mQaiD1x67UkktajMF/yYZk
|
207
|
+
p8xQJZQFSeW5O9H4UepqCgMJoT7fth1NEmVMZRJQ0Tudx3GORmhEGlOX1aRI
|
208
|
+
hQU9ujQeC+LITVehyMyklcQ601brPipRtX0zWn5h1EAcUx2MTqMSIptFKSOj
|
209
|
+
C2VLe0/tYaBpptgwyHJ7FeECwWcVKRsi9E6n2M3gEUQ1UumT0rrLtORSUqL1
|
210
|
+
h/Kuv9ZISXPLXdBDDdHacmBYK9fDhABsTzQSD5nlfIJ1d87gZXtSdNDXcr4k
|
211
|
+
HmiRUesa5ZsIlo9kUVM8twy23709LETuBtF9HN4L33Jgl3Ih3zbUq0NQhv3s
|
212
|
+
GsBa0PhuT1E1D9UNaLejV5oSWM47QIJASVyGbU+2aCFv81yAAiGiJrI9wRQU
|
213
|
+
R42YwZWMgU3ht8kWDUDo08eglAOsuTA5YAvBu+XxY7Zq2py6oKeHb/yWFGHn
|
214
|
+
+NpZnU5J+Ty7ncVwmtxBm/DyfQPJftSTOMDGg6TpZSd0Scsh+f5UvWwg3kGx
|
215
|
+
slNxixb/sTJd+X8YFXF+93IcbGBJqtnSNx6oIHUBTYCVcb1Cu7d4YdkMFzjU
|
216
|
+
UFsdZjQHhO1Wt5sr9gmQ7DiCWxagWh9k/m+q3slwGERg95dyAk5UJGXrqpUk
|
217
|
+
e9ftQ4wasap2FwWJb6oefD0Va95J7TTwd5u63RCBDTWrOlhzx1Qe5cq9adXr
|
218
|
+
B5kX8mBZLpAb+zMZzGZLO57PM5HbhFCjubt8kBFax1foJDzwAZO1Z/Gj3tET
|
219
|
+
/fNc8gBGhhQ0p82HjTSRaLmr4wBlcXArY8v3H1pOEzRYk7pShDbsmYJK1inu
|
220
|
+
8Ltqd16kYj9DhtekLmyu54C9DUN2BakzdiVInT/oyVoVboy+dITNHGvbRJSk
|
221
|
+
wT331oYIGbukcHYZh4G31jOb8mtptZ3llqO6US6sKLmnNoENBap9Q5WK+uVS
|
222
|
+
4QPkotIoGxJeCjszBWAOkfmtGMwEXwy82HKuwRpgtkbM553gC3YeD2o8trTb
|
223
|
+
aCbP73QWB822BtFYzjleUuTT/EZK+XcU31V8G0JSPqo4iI4HNCMkNQiAqFWw
|
224
|
+
wSleZUhwCcQZGAGqPF1AllRsQPlgmAAVr/L8TDGYqsNq8bT0UZh4vgYZaTzq
|
225
|
+
IWvfIOdv1saak0gl+H6ziaFyLb6T/KBVa288j9Ncr++iXX1yRXWdaZ96Foek
|
226
|
+
jFruSjf6tC/XpgNtqYh2asfsQo1dgKZxo7OQXTRY6LkRv3f9lD+ofU89kBvy
|
227
|
+
XKRgUlMCXMrc4PkQHizI8lFbp5cV13+U7wB2dzaeAJJm1aqsyjG9G89SeMlW
|
228
|
+
MzwC28IG/CmPxgtNK0apjdCXOTNphNrC4U8vYAMW1q8UfQlX5Wj8ebLkAzy6
|
229
|
+
YTmfSRFxga/fRfqTkTsZuTl/dJOQR5YzGQ0nI3bDH9llyKP+8qm9mAUVRqJu
|
230
|
+
yn245dwFK15YWxfGo1cUzi4gfIcFB8tlsEix27hkYGo5F1WYNHXeYYGqxYGL
|
231
|
+
OqYMN7+VGcJvGLjTIpfLOHIzEWXChb2b4L4bz10Z7iEq9d5f0SUXOyw443A5
|
232
|
+
Y76GwsCAzXKuZQhYqH0K5srJfJXKVoWVTWgep25+K1wujTJcsNKAiQk6z3s6
|
233
|
+
mbLDcgvLYNcXs9xyrgtD4Z/Zr2KGiGS/gp53DOmJPj/XQNIFVPcTT+3FLMKP
|
234
|
+
wSraRK13RFyBsX9T0esdkecIY7s8IpivEdfeUVFPA7r7Ffq8Tbc3xPxcjbcT
|
235
|
+
je4nodpLe1xbOPwZnaoZx96aI516F5q9oyK64e33Jbzdr7gntcKNP0iNdOn6
|
236
|
+
4lrT7ncswj3fzoIILl3GZg6Gi4lIWxrkbn506TYqZDcKRqdR08Q2f5TGH2Oa
|
237
|
+
JOUXnkYDxdc1vHfR0T9Akyif80sYfSFeBBFAQ4uIBbUV0lPZ3Q2jGoffVZVR
|
238
|
+
KCpUDYUqjxpnT9VCqJB7J2OaIgKxd1ah8ZqKoZO/J6oRerOnQCImc4qOYMum
|
239
|
+
BOnoGisdfNgAKaBNkrxvTr2WrMN4TN5Jths/YTMY2/OUJ4bjhf8tV+QDZhDx
|
240
|
+
+x+RLby4w7dxhYsnxX9AHlfBQMznwssHyyD6oRm0szDwxQ/HpopB/miM0dBW
|
241
|
+
40pimpUt38yGpRskkrholVa65JnP3oK3g4soAL8FNe8NH94N4BTmL4WzDY6O
|
242
|
+
cvDvhBnOcl7JNN4tjyIRfklDcMHQKReZFEdDq8gvL9igMsAHQxFxL1I4SE8R
|
243
|
+
L+8ng8nLssw4vgtEQ57H+WwZ0iNFYLj0JgT/Ko8V/iuWvWK6L6adOmLCVJJY
|
244
|
+
3R2TjB2g8zDFK1PdGwW5agJPFMPCcrhaZMDdfiJyv1wPM/+u8mzR12WT6mGp
|
245
|
+
8FOBTmGIvP01y/y7l7qTJTAfVfvDpqsHNT9cthDT/SO+P93fGx15++P54dHh
|
246
|
+
wbHvH3nHfHo0nu49zQXXUxjIg2i99IQtHY7Nxgez2eh4b+9gPhKH89G+mIqx
|
247
|
+
fzyZHM1GswPR0xfYi59Og3kKF+aoY0OjciSjiyHchAepm9zyTLjjN5T5bGwx
|
248
|
+
8mFqjS24J2Fxm+OjcWSGFWdmhpRPX0+0MPuClc5byOnOC8Z++jf7nXtefmIl
|
249
|
+
9nSeL7/yy/fnH29uvljsPxBt+BuBlyhWxkMYctp4K29cHQ6T4FGEphMWDHzD
|
250
|
+
B+OzWtlKzStx4PWpfwUaHs/ostRSt1ox1VMSpd+n1UKgWxv5pHh/anBqQ47a
|
251
|
+
GOvl1IZy5OQ/ptH5UJmk3tkhNMAh94y9JN5s9JAkUiUf+I3KJcz2y0s4cUb+
|
252
|
+
VwMfeu90Oj3eF+Pp1PdH072D6d7haLx/6B2ozDVQwGJsuIUGiJQLcwgdACcD
|
253
|
+
kRVvwVyjRV7JunJ8m58jmbN9SMu7nB3BC6zRP6IPJnYe+3SZ1ZWAW1UE6jdu
|
254
|
+
+KJY1W9sQvX2UThfTMAvlfIiPUEWtcboje2HJV1uq5Imt1UJ39ZlVeWtCh1V
|
255
|
+
WcNhzheDJU+DKCtGrfReDKfTg4PJoX90eDw6OPYnvvBxHTOuZpwtPmvHB01K
|
256
|
+
91U1b3ND0ELDL7mf/i8AAAD//wMAzykju5B6AAA=
|
257
|
+
http_version:
|
258
|
+
recorded_at: Wed, 27 May 2015 21:42:56 GMT
|
259
|
+
recorded_with: VCR 2.9.3
|
data/spec/lib/textract_spec.rb
CHANGED
@@ -139,4 +139,12 @@ describe Textract do
|
|
139
139
|
end
|
140
140
|
end
|
141
141
|
|
142
|
+
it "handles the santorum loop bug" do
|
143
|
+
VCR.use_cassette('santorum') do
|
144
|
+
url = "http://www.ricksantorum.com/about_rick"
|
145
|
+
text = Textract.get_text(url)
|
146
|
+
expect(text.title).to eq "About Former Senator Rick Santorum"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
142
150
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Pash
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opengraph_parser
|
@@ -175,6 +175,7 @@ files:
|
|
175
175
|
- spec/fixtures/vcr_cassettes/json.yml
|
176
176
|
- spec/fixtures/vcr_cassettes/og.yml
|
177
177
|
- spec/fixtures/vcr_cassettes/robots.yml
|
178
|
+
- spec/fixtures/vcr_cassettes/santorum.yml
|
178
179
|
- spec/fixtures/vcr_cassettes/selector.yml
|
179
180
|
- spec/fixtures/vcr_cassettes/site_bug.yml
|
180
181
|
- spec/fixtures/vcr_cassettes/stackoverflow.yml
|
@@ -218,6 +219,7 @@ test_files:
|
|
218
219
|
- spec/fixtures/vcr_cassettes/json.yml
|
219
220
|
- spec/fixtures/vcr_cassettes/og.yml
|
220
221
|
- spec/fixtures/vcr_cassettes/robots.yml
|
222
|
+
- spec/fixtures/vcr_cassettes/santorum.yml
|
221
223
|
- spec/fixtures/vcr_cassettes/selector.yml
|
222
224
|
- spec/fixtures/vcr_cassettes/site_bug.yml
|
223
225
|
- spec/fixtures/vcr_cassettes/stackoverflow.yml
|