pocong 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pocong/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2025-09-21T17:59:23+0700",
11
+ "date": "2025-09-21T22:32:32+0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "9a2d0f58b89a546044fc52948ce274767aa450d4",
15
- "version": "1.0.0"
14
+ "full-revisionid": "85eac04a9a369e96c0c4819feecca0cc5d23c173",
15
+ "version": "1.0.1"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -2,10 +2,6 @@
2
2
  import random
3
3
 
4
4
  import requests
5
- import pandas as pd
6
- from scrapy.crawler import CrawlerProcess
7
-
8
- from pocong.proxy_spiders.spiders.free_proxy_list_net_spider import ProxySpider
9
5
 
10
6
 
11
7
  class GetProxy():
@@ -13,37 +9,39 @@ class GetProxy():
13
9
  Class to get proxies using Scrapy spiders and validate them.
14
10
  '''
15
11
  def __init__(self):
16
- pass
12
+ self._items = []
17
13
 
18
14
  def _check_proxy(self, x):
19
15
  proxy = f"http://{x['ip']}:{x['port']}"
20
16
  try:
21
17
  response = requests.get("https://httpbin.org/ip", proxies={'https': proxy}, timeout=10)
22
18
  if response.status_code == 200 and response.json().get('origin') == x['ip']:
23
- print(f"checking proxy: {proxy} success") # noqa
24
19
  return response.status_code
25
- print(f"checking proxy: {proxy} failed") # noqa
26
20
  return 0
27
21
  except requests.RequestException:
28
- print(f"checking proxy: {proxy} failed") # noqa
29
22
  return 0
30
23
 
31
- def _run_example_spider(self):
32
- process = CrawlerProcess(settings={
33
- "LOG_LEVEL": "ERROR",
34
- "ITEM_PIPELINES": {'pocong.proxy_spiders.pipelines.Pipelines': 1},
35
- })
36
- process.crawl(ProxySpider)
37
- process.start()
38
- from pocong.proxy_spiders.pipelines import collected_items
39
- return collected_items
40
-
41
24
  def _get_proxy_from_scrape(self):
42
- items = self._run_example_spider()
43
- df = pd.DataFrame(items)
44
- df = df[df['https'] == 'yes']
45
- df = df.drop_duplicates(subset=['ip', 'port'])
46
- proxies_json = df.to_dict(orient='records')
25
+ import subprocess
26
+ import sys
27
+ import json
28
+ # Run the spider in a subprocess to avoid reactor restart error
29
+ code = (
30
+ 'import pandas as pd;'
31
+ 'from scrapy.crawler import CrawlerProcess;'
32
+ 'from pocong.proxy_spiders.spiders.free_proxy_list_net_spider import ProxySpider;'
33
+ 'from pocong.proxy_spiders.pipelines import collected_items;'
34
+ 'process = CrawlerProcess(settings={"LOG_LEVEL": "ERROR", "ITEM_PIPELINES": {"pocong.proxy_spiders.pipelines.Pipelines": 1}});' # noqa: E501
35
+ 'process.crawl(ProxySpider);'
36
+ 'process.start();'
37
+ 'process.stop();'
38
+ 'df = pd.DataFrame(collected_items);'
39
+ 'df = df[df["https"] == "yes"];'
40
+ 'df = df.drop_duplicates(subset=["ip", "port"]);'
41
+ 'print(df.to_json(orient="records"))'
42
+ )
43
+ result = subprocess.run([sys.executable, '-c', code], capture_output=True, text=True)
44
+ proxies_json = json.loads(result.stdout.strip()) if result.stdout.strip() else []
47
45
  return proxies_json
48
46
 
49
47
  def get_proxy(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pocong
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: Python Oriented Crawling Ongoing (POCONG): a simple crawling framework
5
5
  Home-page: https://gitlab.com/mohsin3107/pocong
6
6
  Author: Singgih
@@ -1,14 +1,14 @@
1
1
  pocong/__init__.py,sha256=Hl0PkSkg6LV6IRLzXnGc0K2GY-drxkZEpt5qTAVDUkY,109
2
- pocong/_version.py,sha256=hFgXEoBMIBnyj4LP_PTO6GNR5nrvefsXEARVrbSCj7o,497
2
+ pocong/_version.py,sha256=t_ZIUHG9ovHL3x_o5mb9F_Ih_UhgDKE9j9yI8c92xyk,497
3
3
  pocong/cli.py,sha256=_f_aU4pckbQ_baF9oHwbqwmBFiQFn5Irvi-v5rDZ70o,529
4
4
  pocong/pocong.py,sha256=h0hwdogXGFqerm-5ZPeT-irPn91pCcQRjiHThXsRzEk,19
5
5
  pocong/utils.py,sha256=MAbbL9PXRWnBpJKgI869ZfY42Eph73zcbJyK0jH2Nak,35
6
- pocong/proxy_spiders/__init__.py,sha256=q3ifQZd4_TipTmVYklCdvgVZEkuLR91Qo0LwM4CBMnA,2384
6
+ pocong/proxy_spiders/__init__.py,sha256=RBlvqba1wIhCJPn4n5LNB3SLiCQHDcGOhtrFydWY7T4,2556
7
7
  pocong/proxy_spiders/pipelines.py,sha256=k8DRupjvN7qnIk0uFNJ_3JEFlDadtO0PCBH0iOsPKp4,145
8
8
  pocong/proxy_spiders/spiders/__init__.py,sha256=4-oTTycftRXl_6z92SjSi_XmDfP-1xAaVj39HMggLWc,52
9
9
  pocong/proxy_spiders/spiders/free_proxy_list_net_spider.py,sha256=AV-8_KF7UMRkdcuCaqdhGtbsMawpJt9G3NF6S7aVQO4,886
10
- pocong-1.0.0.dist-info/METADATA,sha256=2A3QXiD-vF7rgHM6sN4Yt9-O8KbRm_5DGaKGfCaKMjw,3411
11
- pocong-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
- pocong-1.0.0.dist-info/entry_points.txt,sha256=Q3F4OQIZJzlnS2tnEuTzcn2tN4S5Btd08o_9Otdb4bM,43
13
- pocong-1.0.0.dist-info/top_level.txt,sha256=ZMo2AlCPGpM4N7hHVSNoIjbM1D90yjFhRra0YmCfTO4,7
14
- pocong-1.0.0.dist-info/RECORD,,
10
+ pocong-1.0.1.dist-info/METADATA,sha256=-t70ukwfphmnul9khbMmbYyIOIowevrC9eriXZrv5so,3411
11
+ pocong-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
12
+ pocong-1.0.1.dist-info/entry_points.txt,sha256=Q3F4OQIZJzlnS2tnEuTzcn2tN4S5Btd08o_9Otdb4bM,43
13
+ pocong-1.0.1.dist-info/top_level.txt,sha256=ZMo2AlCPGpM4N7hHVSNoIjbM1D90yjFhRra0YmCfTO4,7
14
+ pocong-1.0.1.dist-info/RECORD,,
File without changes