pocong 1.0.0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pocong-1.0.0 → pocong-1.0.1}/PKG-INFO +1 -1
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong/_version.py +3 -3
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong/proxy_spiders/__init__.py +21 -23
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong.egg-info/PKG-INFO +1 -1
- {pocong-1.0.0 → pocong-1.0.1}/MANIFEST.in +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/README.md +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/pyproject.toml +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/setup.cfg +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/setup.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong/__init__.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong/cli.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong/pocong.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong/proxy_spiders/pipelines.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong/proxy_spiders/spiders/__init__.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong/proxy_spiders/spiders/free_proxy_list_net_spider.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong/utils.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong.egg-info/SOURCES.txt +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong.egg-info/dependency_links.txt +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong.egg-info/entry_points.txt +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong.egg-info/requires.txt +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/src/pocong.egg-info/top_level.txt +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/tests/test_pocong.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/tests/test_proxy_spiders.py +0 -0
- {pocong-1.0.0 → pocong-1.0.1}/versioneer.py +0 -0
@@ -8,11 +8,11 @@ import json
|
|
8
8
|
|
9
9
|
version_json = '''
|
10
10
|
{
|
11
|
-
"date": "2025-09-
|
11
|
+
"date": "2025-09-21T22:32:32+0700",
|
12
12
|
"dirty": false,
|
13
13
|
"error": null,
|
14
|
-
"full-revisionid": "
|
15
|
-
"version": "1.0.
|
14
|
+
"full-revisionid": "85eac04a9a369e96c0c4819feecca0cc5d23c173",
|
15
|
+
"version": "1.0.1"
|
16
16
|
}
|
17
17
|
''' # END VERSION_JSON
|
18
18
|
|
@@ -2,10 +2,6 @@
|
|
2
2
|
import random
|
3
3
|
|
4
4
|
import requests
|
5
|
-
import pandas as pd
|
6
|
-
from scrapy.crawler import CrawlerProcess
|
7
|
-
|
8
|
-
from pocong.proxy_spiders.spiders.free_proxy_list_net_spider import ProxySpider
|
9
5
|
|
10
6
|
|
11
7
|
class GetProxy():
|
@@ -13,37 +9,39 @@ class GetProxy():
|
|
13
9
|
Class to get proxies using Scrapy spiders and validate them.
|
14
10
|
'''
|
15
11
|
def __init__(self):
|
16
|
-
|
12
|
+
self._items = []
|
17
13
|
|
18
14
|
def _check_proxy(self, x):
|
19
15
|
proxy = f"http://{x['ip']}:{x['port']}"
|
20
16
|
try:
|
21
17
|
response = requests.get("https://httpbin.org/ip", proxies={'https': proxy}, timeout=10)
|
22
18
|
if response.status_code == 200 and response.json().get('origin') == x['ip']:
|
23
|
-
print(f"checking proxy: {proxy} success") # noqa
|
24
19
|
return response.status_code
|
25
|
-
print(f"checking proxy: {proxy} failed") # noqa
|
26
20
|
return 0
|
27
21
|
except requests.RequestException:
|
28
|
-
print(f"checking proxy: {proxy} failed") # noqa
|
29
22
|
return 0
|
30
23
|
|
31
|
-
def _run_example_spider(self):
|
32
|
-
process = CrawlerProcess(settings={
|
33
|
-
"LOG_LEVEL": "ERROR",
|
34
|
-
"ITEM_PIPELINES": {'pocong.proxy_spiders.pipelines.Pipelines': 1},
|
35
|
-
})
|
36
|
-
process.crawl(ProxySpider)
|
37
|
-
process.start()
|
38
|
-
from pocong.proxy_spiders.pipelines import collected_items
|
39
|
-
return collected_items
|
40
|
-
|
41
24
|
def _get_proxy_from_scrape(self):
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
25
|
+
import subprocess
|
26
|
+
import sys
|
27
|
+
import json
|
28
|
+
# Run the spider in a subprocess to avoid reactor restart error
|
29
|
+
code = (
|
30
|
+
'import pandas as pd;'
|
31
|
+
'from scrapy.crawler import CrawlerProcess;'
|
32
|
+
'from pocong.proxy_spiders.spiders.free_proxy_list_net_spider import ProxySpider;'
|
33
|
+
'from pocong.proxy_spiders.pipelines import collected_items;'
|
34
|
+
'process = CrawlerProcess(settings={"LOG_LEVEL": "ERROR", "ITEM_PIPELINES": {"pocong.proxy_spiders.pipelines.Pipelines": 1}});' # noqa: E501
|
35
|
+
'process.crawl(ProxySpider);'
|
36
|
+
'process.start();'
|
37
|
+
'process.stop();'
|
38
|
+
'df = pd.DataFrame(collected_items);'
|
39
|
+
'df = df[df["https"] == "yes"];'
|
40
|
+
'df = df.drop_duplicates(subset=["ip", "port"]);'
|
41
|
+
'print(df.to_json(orient="records"))'
|
42
|
+
)
|
43
|
+
result = subprocess.run([sys.executable, '-c', code], capture_output=True, text=True)
|
44
|
+
proxies_json = json.loads(result.stdout.strip()) if result.stdout.strip() else []
|
47
45
|
return proxies_json
|
48
46
|
|
49
47
|
def get_proxy(self):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{pocong-1.0.0 → pocong-1.0.1}/src/pocong/proxy_spiders/spiders/free_proxy_list_net_spider.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|