fb_scraper_request 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fb_scraper_request-0.2.3/PKG-INFO +122 -0
- fb_scraper_request-0.2.3/README.md +91 -0
- fb_scraper_request-0.2.3/fb_scraper_request/example.py +33 -0
- {fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/fb_scraper_request/facebook_graphql_scraper.py +84 -36
- fb_scraper_request-0.2.3/fb_scraper_request.egg-info/PKG-INFO +122 -0
- {fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/fb_scraper_request.egg-info/SOURCES.txt +0 -1
- {fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/fb_scraper_request.egg-info/requires.txt +0 -1
- {fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/pyproject.toml +4 -5
- fb_scraper_request-0.2.1/PKG-INFO +0 -55
- fb_scraper_request-0.2.1/README.md +0 -18
- fb_scraper_request-0.2.1/fb_scraper_request/example.py +0 -26
- fb_scraper_request-0.2.1/fb_scraper_request.egg-info/PKG-INFO +0 -55
- fb_scraper_request-0.2.1/setup.py +0 -28
- {fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/LICENSE +0 -0
- {fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/fb_scraper_request/__init__.py +0 -0
- {fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/fb_scraper_request.egg-info/dependency_links.txt +0 -0
- {fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/fb_scraper_request.egg-info/top_level.txt +0 -0
- {fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/setup.cfg +0 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fb_scraper_request
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: Facebook GraphQL Scraper - No login required, simple API to scrape public Facebook posts
|
|
5
|
+
Author-email: Nguyen Minh Quang <quangforwork1203@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/DOCUTEE/fb_crawl_request
|
|
8
|
+
Project-URL: Repository, https://github.com/DOCUTEE/fb_crawl_request
|
|
9
|
+
Project-URL: Issues, https://github.com/DOCUTEE/fb_crawl_request/issues
|
|
10
|
+
Keywords: facebook,scraper,graphql,social-media,crawler
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: requests>=2.28.0
|
|
25
|
+
Requires-Dist: pytz
|
|
26
|
+
Requires-Dist: pip>=26.0.1
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: build>=0.8.0; extra == "dev"
|
|
29
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# fb_scraper_request
|
|
33
|
+
|
|
34
|
+
Scrape public Facebook posts without login using a simple, requests-based scraper.
|
|
35
|
+
|
|
36
|
+
This project is a streamlined fork developed from the foundational work at [FaustRen/facebook-graphql-scraper](https://github.com/FaustRen/facebook-graphql-scraper).
|
|
37
|
+
|
|
38
|
+
## Improvements
|
|
39
|
+
- **Lightweight:** Uses only `requests` to fetch user posts—no browser, Selenium, or Playwright required.
|
|
40
|
+
- **Focused:** Removed login-dependent features to focus exclusively on public page scraping.
|
|
41
|
+
- **Efficient:** Optimized for speed and minimal dependency overhead.
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install fb_scraper_request
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick Example
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from fb_scraper_request import FacebookGraphqlScraper
|
|
53
|
+
|
|
54
|
+
# Initialize the scraper
|
|
55
|
+
fb = FacebookGraphqlScraper()
|
|
56
|
+
|
|
57
|
+
# Get posts from a specific username with a day limit
|
|
58
|
+
result = fb.get_user_posts("honghotduongpho.official00", days_limit=3)
|
|
59
|
+
|
|
60
|
+
for post in result["data"]:
|
|
61
|
+
print(f"Content: {post['context']}")
|
|
62
|
+
print(f"Likes: {post['reaction_count.count']}")
|
|
63
|
+
print("-" * 20)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Results
|
|
67
|
+
The result object returns a structured dictionary containing profile info and a list of post data:
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"fb_username_or_userid": "100063640556423",
|
|
71
|
+
"profile": [
|
|
72
|
+
"Life at VNG | Ho Chi Minh City"
|
|
73
|
+
],
|
|
74
|
+
"data": [
|
|
75
|
+
{
|
|
76
|
+
"post_id": "1601334011997935",
|
|
77
|
+
"post_url": "https://www.facebook.com/1601334011997935",
|
|
78
|
+
"username_or_userid": "100063640556423",
|
|
79
|
+
"owing_profile": {
|
|
80
|
+
"__typename": "User",
|
|
81
|
+
"name": "Life at VNG",
|
|
82
|
+
"short_name": "Life at VNG",
|
|
83
|
+
"id": "100063640556423"
|
|
84
|
+
},
|
|
85
|
+
"published_date": "2026-03-28T11:58:04",
|
|
86
|
+
"published_date2": "2026-03-28",
|
|
87
|
+
"time": 1774673884,
|
|
88
|
+
"reaction_count.count": 16,
|
|
89
|
+
"comment_rendering_instance.comments.total_count": null,
|
|
90
|
+
"share_count.count": null,
|
|
91
|
+
"sub_reactions": {
|
|
92
|
+
"Thích": 10,
|
|
93
|
+
"Yêu thích": 5,
|
|
94
|
+
"Wow": 1
|
|
95
|
+
},
|
|
96
|
+
"context": "[HÀ NỘI] BUSINESS DEVELOPMENT FRESHER 2026 “BẬT ĐỊNH VỊ” HẸN GẶP SINH VIÊN THỦ ĐÔ 📍\n✨ Ứng tuyển Business Development Fresher 2026 tại: https://bit.ly/4rRnqaG \n\nBusiness Development Fresher 2026 (BDF 2026) - chương trình tuyển chọn và phát triển thế hệ Business Development tiềm năng của VNG ZingPlay Game Studios đã sẵn sàng gặp gỡ và giao lưu cùng các bạn sinh viên Hà Nội.\n\n👉 Nếu bạn đam mê khám phá thị trường Game,...",
|
|
97
|
+
"video_view_count": null
|
|
98
|
+
},
|
|
99
|
+
...
|
|
100
|
+
],
|
|
101
|
+
"raw_data": [
|
|
102
|
+
<raw_facebook_meta_response>
|
|
103
|
+
]
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Extracting Additional Data
|
|
108
|
+
|
|
109
|
+
> **Note:** If you need more data fields than what's provided in `data`, you can manually extract additional information from `raw_data`. This contains the complete raw Facebook GraphQL API responses.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
result = fb.get_user_posts("page_username", days_limit=3)
|
|
113
|
+
|
|
114
|
+
# Access raw API responses for custom data extraction
|
|
115
|
+
for raw_response in result["raw_data"]:
|
|
116
|
+
# Extract any custom fields you need
|
|
117
|
+
custom_field = raw_response.get("your_custom_field")
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Credits
|
|
121
|
+
|
|
122
|
+
Thank you to the original project owner [FaustRen](https://github.com/FaustRen) and the [facebook-graphql-scraper](https://github.com/FaustRen/facebook-graphql-scraper) repository for the foundational work that made this simplified version possible.
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# fb_scraper_request
|
|
2
|
+
|
|
3
|
+
Scrape public Facebook posts without login using a simple, requests-based scraper.
|
|
4
|
+
|
|
5
|
+
This project is a streamlined fork developed from the foundational work at [FaustRen/facebook-graphql-scraper](https://github.com/FaustRen/facebook-graphql-scraper).
|
|
6
|
+
|
|
7
|
+
## Improvements
|
|
8
|
+
- **Lightweight:** Uses only `requests` to fetch user posts—no browser, Selenium, or Playwright required.
|
|
9
|
+
- **Focused:** Removed login-dependent features to focus exclusively on public page scraping.
|
|
10
|
+
- **Efficient:** Optimized for speed and minimal dependency overhead.
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install fb_scraper_request
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Quick Example
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from fb_scraper_request import FacebookGraphqlScraper
|
|
22
|
+
|
|
23
|
+
# Initialize the scraper
|
|
24
|
+
fb = FacebookGraphqlScraper()
|
|
25
|
+
|
|
26
|
+
# Get posts from a specific username with a day limit
|
|
27
|
+
result = fb.get_user_posts("honghotduongpho.official00", days_limit=3)
|
|
28
|
+
|
|
29
|
+
for post in result["data"]:
|
|
30
|
+
print(f"Content: {post['context']}")
|
|
31
|
+
print(f"Likes: {post['reaction_count.count']}")
|
|
32
|
+
print("-" * 20)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Results
|
|
36
|
+
The result object returns a structured dictionary containing profile info and a list of post data:
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"fb_username_or_userid": "100063640556423",
|
|
40
|
+
"profile": [
|
|
41
|
+
"Life at VNG | Ho Chi Minh City"
|
|
42
|
+
],
|
|
43
|
+
"data": [
|
|
44
|
+
{
|
|
45
|
+
"post_id": "1601334011997935",
|
|
46
|
+
"post_url": "https://www.facebook.com/1601334011997935",
|
|
47
|
+
"username_or_userid": "100063640556423",
|
|
48
|
+
"owing_profile": {
|
|
49
|
+
"__typename": "User",
|
|
50
|
+
"name": "Life at VNG",
|
|
51
|
+
"short_name": "Life at VNG",
|
|
52
|
+
"id": "100063640556423"
|
|
53
|
+
},
|
|
54
|
+
"published_date": "2026-03-28T11:58:04",
|
|
55
|
+
"published_date2": "2026-03-28",
|
|
56
|
+
"time": 1774673884,
|
|
57
|
+
"reaction_count.count": 16,
|
|
58
|
+
"comment_rendering_instance.comments.total_count": null,
|
|
59
|
+
"share_count.count": null,
|
|
60
|
+
"sub_reactions": {
|
|
61
|
+
"Thích": 10,
|
|
62
|
+
"Yêu thích": 5,
|
|
63
|
+
"Wow": 1
|
|
64
|
+
},
|
|
65
|
+
"context": "[HÀ NỘI] BUSINESS DEVELOPMENT FRESHER 2026 “BẬT ĐỊNH VỊ” HẸN GẶP SINH VIÊN THỦ ĐÔ 📍\n✨ Ứng tuyển Business Development Fresher 2026 tại: https://bit.ly/4rRnqaG \n\nBusiness Development Fresher 2026 (BDF 2026) - chương trình tuyển chọn và phát triển thế hệ Business Development tiềm năng của VNG ZingPlay Game Studios đã sẵn sàng gặp gỡ và giao lưu cùng các bạn sinh viên Hà Nội.\n\n👉 Nếu bạn đam mê khám phá thị trường Game,...",
|
|
66
|
+
"video_view_count": null
|
|
67
|
+
},
|
|
68
|
+
...
|
|
69
|
+
],
|
|
70
|
+
"raw_data": [
|
|
71
|
+
<raw_facebook_meta_response>
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Extracting Additional Data
|
|
77
|
+
|
|
78
|
+
> **Note:** If you need more data fields than what's provided in `data`, you can manually extract additional information from `raw_data`. This contains the complete raw Facebook GraphQL API responses.
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
result = fb.get_user_posts("page_username", days_limit=3)
|
|
82
|
+
|
|
83
|
+
# Access raw API responses for custom data extraction
|
|
84
|
+
for raw_response in result["raw_data"]:
|
|
85
|
+
# Extract any custom fields you need
|
|
86
|
+
custom_field = raw_response.get("your_custom_field")
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Credits
|
|
90
|
+
|
|
91
|
+
Thank you to the original project owner [FaustRen](https://github.com/FaustRen) and the [facebook-graphql-scraper](https://github.com/FaustRen/facebook-graphql-scraper) repository for the foundational work that made this simplified version possible.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
from fb_scraper_request.facebook_graphql_scraper import (
|
|
3
|
+
FacebookGraphqlScraper as fb_graphql_scraper,
|
|
4
|
+
)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
## Example.1 - without logging in
|
|
8
|
+
if __name__ == "__main__":
|
|
9
|
+
facebook_user_name = "love.yuweishao"
|
|
10
|
+
facebook_user_id = "100044253168423"
|
|
11
|
+
days_limit = 100 # Number of days within which to scrape posts
|
|
12
|
+
driver_path = (
|
|
13
|
+
"/Users/hongshangren/Downloads/chromedriver-mac-arm64_136/chromedriver"
|
|
14
|
+
)
|
|
15
|
+
fb_spider = fb_graphql_scraper(driver_path=driver_path, open_browser=False)
|
|
16
|
+
res = fb_spider.get_user_posts(
|
|
17
|
+
fb_username_or_userid=facebook_user_id,
|
|
18
|
+
days_limit=days_limit,
|
|
19
|
+
display_progress=True,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
## Example.2 - login in your facebook account to collect data
|
|
24
|
+
# if __name__ == "__main__":
|
|
25
|
+
# facebook_user_name = "love.yuweishao"
|
|
26
|
+
# facebook_user_id = "100044253168423"
|
|
27
|
+
# fb_account = "facebook_account"
|
|
28
|
+
# fb_pwd = "facebook_paswword"
|
|
29
|
+
# days_limit = 30 # Number of days within which to scrape posts
|
|
30
|
+
# driver_path = "/Users/hongshangren/Downloads/chromedriver-mac-arm64_136/chromedriver"
|
|
31
|
+
# fb_spider = fb_graphql_scraper(fb_account=fb_account,fb_pwd=fb_pwd, driver_path=driver_path, open_browser=False)
|
|
32
|
+
# res = fb_spider.get_user_posts(fb_username_or_userid=facebook_user_name, days_limit=days_limit,display_progress=True)
|
|
33
|
+
# print(res)
|
{fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/fb_scraper_request/facebook_graphql_scraper.py
RENAMED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
import re
|
|
3
|
-
import pandas as pd
|
|
4
3
|
import requests
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from fb_scraper_request.utils.parser import RequestsParser
|
|
6
|
+
from fb_scraper_request.utils.locator import *
|
|
7
|
+
from fb_scraper_request.utils.utils import *
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class FacebookSettings:
|
|
11
11
|
"""Facebook GraphQL Scraper - No login required
|
|
12
12
|
|
|
13
13
|
Example:
|
|
14
|
-
from
|
|
14
|
+
from fb_scraper_request.facebook_graphql_scraper import FacebookGraphqlScraper as fb_graphql_scraper
|
|
15
15
|
|
|
16
16
|
if __name__ == "__main__":
|
|
17
17
|
facebook_user_name = "love.yuweishao"
|
|
@@ -117,42 +117,90 @@ class FacebookGraphqlScraper(FacebookSettings):
|
|
|
117
117
|
return username, profile_feed
|
|
118
118
|
|
|
119
119
|
def format_data(self, res_in, fb_username_or_userid, new_reactions):
|
|
120
|
-
|
|
121
|
-
final_res
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
[
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
"
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
120
|
+
# Build result list without pandas
|
|
121
|
+
final_res = []
|
|
122
|
+
for i, post in enumerate(res_in):
|
|
123
|
+
# Flatten nested structure manually
|
|
124
|
+
flat_post = self._flatten_dict(post)
|
|
125
|
+
|
|
126
|
+
# Add computed fields
|
|
127
|
+
flat_post["context"] = (
|
|
128
|
+
self.requests_parser.context_list[i]
|
|
129
|
+
if i < len(self.requests_parser.context_list)
|
|
130
|
+
else ""
|
|
131
|
+
)
|
|
132
|
+
flat_post["username_or_userid"] = fb_username_or_userid
|
|
133
|
+
flat_post["owing_profile"] = (
|
|
134
|
+
self.requests_parser.owning_profile[i]
|
|
135
|
+
if i < len(self.requests_parser.owning_profile)
|
|
136
|
+
else {}
|
|
137
|
+
)
|
|
138
|
+
flat_post["sub_reactions"] = (
|
|
139
|
+
new_reactions[i] if i < len(new_reactions) else {}
|
|
140
|
+
)
|
|
141
|
+
flat_post["post_url"] = "https://www.facebook.com/" + flat_post.get(
|
|
142
|
+
"post_id", ""
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Convert timestamp to datetime
|
|
146
|
+
time_val = (
|
|
147
|
+
self.requests_parser.creation_list[i]
|
|
148
|
+
if i < len(self.requests_parser.creation_list)
|
|
149
|
+
else 0
|
|
150
|
+
)
|
|
151
|
+
flat_post["time"] = time_val
|
|
152
|
+
dt = datetime.fromtimestamp(time_val)
|
|
153
|
+
flat_post["published_date"] = dt.isoformat()
|
|
154
|
+
flat_post["published_date2"] = dt.strftime("%Y-%m-%d")
|
|
155
|
+
|
|
156
|
+
# Select only needed fields
|
|
157
|
+
selected = {
|
|
158
|
+
"post_id": flat_post.get("post_id"),
|
|
159
|
+
"post_url": flat_post.get("post_url"),
|
|
160
|
+
"username_or_userid": flat_post.get("username_or_userid"),
|
|
161
|
+
"owing_profile": flat_post.get("owing_profile"),
|
|
162
|
+
"published_date": flat_post.get("published_date"),
|
|
163
|
+
"published_date2": flat_post.get("published_date2"),
|
|
164
|
+
"time": flat_post.get("time"),
|
|
165
|
+
"reaction_count.count": flat_post.get("reaction_count", {}).get("count")
|
|
166
|
+
if isinstance(flat_post.get("reaction_count"), dict)
|
|
167
|
+
else flat_post.get("reaction_count.count"),
|
|
168
|
+
"comment_rendering_instance.comments.total_count": flat_post.get(
|
|
169
|
+
"comment_rendering_instance", {}
|
|
170
|
+
)
|
|
171
|
+
.get("comments", {})
|
|
172
|
+
.get("total_count")
|
|
173
|
+
if isinstance(flat_post.get("comment_rendering_instance"), dict)
|
|
174
|
+
else None,
|
|
175
|
+
"share_count.count": flat_post.get("share_count", {}).get("count")
|
|
176
|
+
if isinstance(flat_post.get("share_count"), dict)
|
|
177
|
+
else None,
|
|
178
|
+
"sub_reactions": flat_post.get("sub_reactions"),
|
|
179
|
+
"context": flat_post.get("context"),
|
|
180
|
+
"video_view_count": flat_post.get("video_view_count"),
|
|
181
|
+
}
|
|
182
|
+
final_res.append(selected)
|
|
183
|
+
|
|
184
|
+
# Remove duplicates
|
|
148
185
|
filtered_post_id = []
|
|
149
186
|
filtered_data = []
|
|
150
|
-
for each_data in
|
|
187
|
+
for each_data in final_res:
|
|
151
188
|
if each_data["post_id"] not in filtered_post_id:
|
|
152
189
|
filtered_data.append(each_data)
|
|
153
190
|
filtered_post_id.append(each_data["post_id"])
|
|
154
191
|
return filtered_data
|
|
155
192
|
|
|
193
|
+
def _flatten_dict(self, d, parent_key="", sep="."):
|
|
194
|
+
"""Flatten nested dictionary"""
|
|
195
|
+
items = {}
|
|
196
|
+
for k, v in d.items():
|
|
197
|
+
new_key = f"{parent_key}{sep}{k}" if parent_key else k
|
|
198
|
+
if isinstance(v, dict):
|
|
199
|
+
items.update(self._flatten_dict(v, new_key, sep))
|
|
200
|
+
else:
|
|
201
|
+
items[new_key] = v
|
|
202
|
+
return items
|
|
203
|
+
|
|
156
204
|
def process_reactions(self, res_in):
|
|
157
205
|
reactions_out = []
|
|
158
206
|
for each_res in res_in:
|
|
@@ -238,7 +286,7 @@ class FacebookGraphqlScraper(FacebookSettings):
|
|
|
238
286
|
id_in=fb_username_or_userid,
|
|
239
287
|
before_time=before_time, # input before_time
|
|
240
288
|
)
|
|
241
|
-
print("playload_in:", payload_in)
|
|
289
|
+
# print("playload_in:", payload_in)
|
|
242
290
|
response = requests.post(
|
|
243
291
|
url=url,
|
|
244
292
|
data=payload_in,
|
|
@@ -246,7 +294,7 @@ class FacebookGraphqlScraper(FacebookSettings):
|
|
|
246
294
|
data = response.content
|
|
247
295
|
decoded_data = data.decode("utf-8")
|
|
248
296
|
body_content = decoded_data.split("\n")
|
|
249
|
-
print(body_content[:5])
|
|
297
|
+
# print(body_content[:5])
|
|
250
298
|
self.requests_parser.parse_body(body_content=body_content)
|
|
251
299
|
is_first_time = False
|
|
252
300
|
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fb_scraper_request
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: Facebook GraphQL Scraper - No login required, simple API to scrape public Facebook posts
|
|
5
|
+
Author-email: Nguyen Minh Quang <quangforwork1203@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/DOCUTEE/fb_crawl_request
|
|
8
|
+
Project-URL: Repository, https://github.com/DOCUTEE/fb_crawl_request
|
|
9
|
+
Project-URL: Issues, https://github.com/DOCUTEE/fb_crawl_request/issues
|
|
10
|
+
Keywords: facebook,scraper,graphql,social-media,crawler
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: requests>=2.28.0
|
|
25
|
+
Requires-Dist: pytz
|
|
26
|
+
Requires-Dist: pip>=26.0.1
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: build>=0.8.0; extra == "dev"
|
|
29
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# fb_scraper_request
|
|
33
|
+
|
|
34
|
+
Scrape public Facebook posts without login using a simple, requests-based scraper.
|
|
35
|
+
|
|
36
|
+
This project is a streamlined fork developed from the foundational work at [FaustRen/facebook-graphql-scraper](https://github.com/FaustRen/facebook-graphql-scraper).
|
|
37
|
+
|
|
38
|
+
## Improvements
|
|
39
|
+
- **Lightweight:** Uses only `requests` to fetch user posts—no browser, Selenium, or Playwright required.
|
|
40
|
+
- **Focused:** Removed login-dependent features to focus exclusively on public page scraping.
|
|
41
|
+
- **Efficient:** Optimized for speed and minimal dependency overhead.
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install fb_scraper_request
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick Example
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from fb_scraper_request import FacebookGraphqlScraper
|
|
53
|
+
|
|
54
|
+
# Initialize the scraper
|
|
55
|
+
fb = FacebookGraphqlScraper()
|
|
56
|
+
|
|
57
|
+
# Get posts from a specific username with a day limit
|
|
58
|
+
result = fb.get_user_posts("honghotduongpho.official00", days_limit=3)
|
|
59
|
+
|
|
60
|
+
for post in result["data"]:
|
|
61
|
+
print(f"Content: {post['context']}")
|
|
62
|
+
print(f"Likes: {post['reaction_count.count']}")
|
|
63
|
+
print("-" * 20)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Results
|
|
67
|
+
The result object returns a structured dictionary containing profile info and a list of post data:
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"fb_username_or_userid": "100063640556423",
|
|
71
|
+
"profile": [
|
|
72
|
+
"Life at VNG | Ho Chi Minh City"
|
|
73
|
+
],
|
|
74
|
+
"data": [
|
|
75
|
+
{
|
|
76
|
+
"post_id": "1601334011997935",
|
|
77
|
+
"post_url": "https://www.facebook.com/1601334011997935",
|
|
78
|
+
"username_or_userid": "100063640556423",
|
|
79
|
+
"owing_profile": {
|
|
80
|
+
"__typename": "User",
|
|
81
|
+
"name": "Life at VNG",
|
|
82
|
+
"short_name": "Life at VNG",
|
|
83
|
+
"id": "100063640556423"
|
|
84
|
+
},
|
|
85
|
+
"published_date": "2026-03-28T11:58:04",
|
|
86
|
+
"published_date2": "2026-03-28",
|
|
87
|
+
"time": 1774673884,
|
|
88
|
+
"reaction_count.count": 16,
|
|
89
|
+
"comment_rendering_instance.comments.total_count": null,
|
|
90
|
+
"share_count.count": null,
|
|
91
|
+
"sub_reactions": {
|
|
92
|
+
"Thích": 10,
|
|
93
|
+
"Yêu thích": 5,
|
|
94
|
+
"Wow": 1
|
|
95
|
+
},
|
|
96
|
+
"context": "[HÀ NỘI] BUSINESS DEVELOPMENT FRESHER 2026 “BẬT ĐỊNH VỊ” HẸN GẶP SINH VIÊN THỦ ĐÔ 📍\n✨ Ứng tuyển Business Development Fresher 2026 tại: https://bit.ly/4rRnqaG \n\nBusiness Development Fresher 2026 (BDF 2026) - chương trình tuyển chọn và phát triển thế hệ Business Development tiềm năng của VNG ZingPlay Game Studios đã sẵn sàng gặp gỡ và giao lưu cùng các bạn sinh viên Hà Nội.\n\n👉 Nếu bạn đam mê khám phá thị trường Game,...",
|
|
97
|
+
"video_view_count": null
|
|
98
|
+
},
|
|
99
|
+
...
|
|
100
|
+
],
|
|
101
|
+
"raw_data": [
|
|
102
|
+
<raw_facebook_meta_response>
|
|
103
|
+
]
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Extracting Additional Data
|
|
108
|
+
|
|
109
|
+
> **Note:** If you need more data fields than what's provided in `data`, you can manually extract additional information from `raw_data`. This contains the complete raw Facebook GraphQL API responses.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
result = fb.get_user_posts("page_username", days_limit=3)
|
|
113
|
+
|
|
114
|
+
# Access raw API responses for custom data extraction
|
|
115
|
+
for raw_response in result["raw_data"]:
|
|
116
|
+
# Extract any custom fields you need
|
|
117
|
+
custom_field = raw_response.get("your_custom_field")
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Credits
|
|
121
|
+
|
|
122
|
+
Thank you to the original project owner [FaustRen](https://github.com/FaustRen) and the [facebook-graphql-scraper](https://github.com/FaustRen/facebook-graphql-scraper) repository for the foundational work that made this simplified version possible.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "fb_scraper_request"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.3"
|
|
4
4
|
description = "Facebook GraphQL Scraper - No login required, simple API to scrape public Facebook posts"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.9"
|
|
@@ -22,7 +22,6 @@ classifiers = [
|
|
|
22
22
|
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
23
23
|
]
|
|
24
24
|
dependencies = [
|
|
25
|
-
"pandas>=2.0.0",
|
|
26
25
|
"requests>=2.28.0",
|
|
27
26
|
"pytz",
|
|
28
27
|
"pip>=26.0.1",
|
|
@@ -35,9 +34,9 @@ dev = [
|
|
|
35
34
|
]
|
|
36
35
|
|
|
37
36
|
[project.urls]
|
|
38
|
-
Homepage = "https://github.com/
|
|
39
|
-
Repository = "https://github.com/
|
|
40
|
-
Issues = "https://github.com/
|
|
37
|
+
Homepage = "https://github.com/DOCUTEE/fb_crawl_request"
|
|
38
|
+
Repository = "https://github.com/DOCUTEE/fb_crawl_request"
|
|
39
|
+
Issues = "https://github.com/DOCUTEE/fb_crawl_request/issues"
|
|
41
40
|
|
|
42
41
|
[build-system]
|
|
43
42
|
requires = ["setuptools>=45", "wheel"]
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: fb_scraper_request
|
|
3
|
-
Version: 0.2.1
|
|
4
|
-
Summary: Facebook GraphQL Scraper - No login required, simple API to scrape public Facebook posts
|
|
5
|
-
Home-page: https://github.com/FaustRen/FB_graphql_scraper
|
|
6
|
-
Author: FaustRen
|
|
7
|
-
Author-email: Nguyen Minh Quang <quangforwork1203@gmail.com>
|
|
8
|
-
License: MIT
|
|
9
|
-
Project-URL: Homepage, https://github.com/quangnguyen/facebook-graphql-scraper
|
|
10
|
-
Project-URL: Repository, https://github.com/quangnguyen/facebook-graphql-scraper
|
|
11
|
-
Project-URL: Issues, https://github.com/quangnguyen/facebook-graphql-scraper/issues
|
|
12
|
-
Keywords: facebook,scraper,graphql,social-media,crawler
|
|
13
|
-
Classifier: Development Status :: 4 - Beta
|
|
14
|
-
Classifier: Intended Audience :: Developers
|
|
15
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
-
Classifier: Topic :: Internet :: WWW/HTTP
|
|
22
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
-
Requires-Python: >=3.11
|
|
24
|
-
Description-Content-Type: text/markdown
|
|
25
|
-
License-File: LICENSE
|
|
26
|
-
Requires-Dist: pandas>=2.0.0
|
|
27
|
-
Requires-Dist: requests>=2.28.0
|
|
28
|
-
Requires-Dist: pytz
|
|
29
|
-
Requires-Dist: pip>=26.0.1
|
|
30
|
-
Provides-Extra: dev
|
|
31
|
-
Requires-Dist: build>=0.8.0; extra == "dev"
|
|
32
|
-
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
33
|
-
Dynamic: author
|
|
34
|
-
Dynamic: home-page
|
|
35
|
-
Dynamic: license-file
|
|
36
|
-
Dynamic: requires-python
|
|
37
|
-
|
|
38
|
-
# fb_scraper_request
|
|
39
|
-
|
|
40
|
-
Scrape public Facebook posts without login. Simple requests-based scraper.
|
|
41
|
-
|
|
42
|
-
## Example
|
|
43
|
-
|
|
44
|
-
```python
|
|
45
|
-
from fb_scraper_request import FacebookGraphqlScraper
|
|
46
|
-
|
|
47
|
-
fb = FacebookGraphqlScraper()
|
|
48
|
-
result = fb.get_user_posts("honghotduongpho.official00", days_limit=3)
|
|
49
|
-
|
|
50
|
-
for post in result["data"]:
|
|
51
|
-
print(post["context"])
|
|
52
|
-
print(f"Likes: {post['reaction_count.count']}")
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
Install: `pip install fb_scraper_request`
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
# fb_scraper_request
|
|
2
|
-
|
|
3
|
-
Scrape public Facebook posts without login. Simple requests-based scraper.
|
|
4
|
-
|
|
5
|
-
## Example
|
|
6
|
-
|
|
7
|
-
```python
|
|
8
|
-
from fb_scraper_request import FacebookGraphqlScraper
|
|
9
|
-
|
|
10
|
-
fb = FacebookGraphqlScraper()
|
|
11
|
-
result = fb.get_user_posts("honghotduongpho.official00", days_limit=3)
|
|
12
|
-
|
|
13
|
-
for post in result["data"]:
|
|
14
|
-
print(post["context"])
|
|
15
|
-
print(f"Likes: {post['reaction_count.count']}")
|
|
16
|
-
```
|
|
17
|
-
|
|
18
|
-
Install: `pip install fb_scraper_request`
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
from fb_graphql_scraper.facebook_graphql_scraper import FacebookGraphqlScraper as fb_graphql_scraper
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
## Example.1 - without logging in
|
|
6
|
-
if __name__ == "__main__":
|
|
7
|
-
facebook_user_name = "love.yuweishao"
|
|
8
|
-
facebook_user_id = "100044253168423"
|
|
9
|
-
days_limit = 100 # Number of days within which to scrape posts
|
|
10
|
-
driver_path = "/Users/hongshangren/Downloads/chromedriver-mac-arm64_136/chromedriver"
|
|
11
|
-
fb_spider = fb_graphql_scraper(driver_path=driver_path, open_browser=False)
|
|
12
|
-
res = fb_spider.get_user_posts(fb_username_or_userid=facebook_user_id, days_limit=days_limit,display_progress=True)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
## Example.2 - login in your facebook account to collect data
|
|
16
|
-
# if __name__ == "__main__":
|
|
17
|
-
# facebook_user_name = "love.yuweishao"
|
|
18
|
-
# facebook_user_id = "100044253168423"
|
|
19
|
-
# fb_account = "facebook_account"
|
|
20
|
-
# fb_pwd = "facebook_paswword"
|
|
21
|
-
# days_limit = 30 # Number of days within which to scrape posts
|
|
22
|
-
# driver_path = "/Users/hongshangren/Downloads/chromedriver-mac-arm64_136/chromedriver"
|
|
23
|
-
# fb_spider = fb_graphql_scraper(fb_account=fb_account,fb_pwd=fb_pwd, driver_path=driver_path, open_browser=False)
|
|
24
|
-
# res = fb_spider.get_user_posts(fb_username_or_userid=facebook_user_name, days_limit=days_limit,display_progress=True)
|
|
25
|
-
# print(res)
|
|
26
|
-
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: fb_scraper_request
|
|
3
|
-
Version: 0.2.1
|
|
4
|
-
Summary: Facebook GraphQL Scraper - No login required, simple API to scrape public Facebook posts
|
|
5
|
-
Home-page: https://github.com/FaustRen/FB_graphql_scraper
|
|
6
|
-
Author: FaustRen
|
|
7
|
-
Author-email: Nguyen Minh Quang <quangforwork1203@gmail.com>
|
|
8
|
-
License: MIT
|
|
9
|
-
Project-URL: Homepage, https://github.com/quangnguyen/facebook-graphql-scraper
|
|
10
|
-
Project-URL: Repository, https://github.com/quangnguyen/facebook-graphql-scraper
|
|
11
|
-
Project-URL: Issues, https://github.com/quangnguyen/facebook-graphql-scraper/issues
|
|
12
|
-
Keywords: facebook,scraper,graphql,social-media,crawler
|
|
13
|
-
Classifier: Development Status :: 4 - Beta
|
|
14
|
-
Classifier: Intended Audience :: Developers
|
|
15
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
-
Classifier: Topic :: Internet :: WWW/HTTP
|
|
22
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
-
Requires-Python: >=3.11
|
|
24
|
-
Description-Content-Type: text/markdown
|
|
25
|
-
License-File: LICENSE
|
|
26
|
-
Requires-Dist: pandas>=2.0.0
|
|
27
|
-
Requires-Dist: requests>=2.28.0
|
|
28
|
-
Requires-Dist: pytz
|
|
29
|
-
Requires-Dist: pip>=26.0.1
|
|
30
|
-
Provides-Extra: dev
|
|
31
|
-
Requires-Dist: build>=0.8.0; extra == "dev"
|
|
32
|
-
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
33
|
-
Dynamic: author
|
|
34
|
-
Dynamic: home-page
|
|
35
|
-
Dynamic: license-file
|
|
36
|
-
Dynamic: requires-python
|
|
37
|
-
|
|
38
|
-
# fb_scraper_request
|
|
39
|
-
|
|
40
|
-
Scrape public Facebook posts without login. Simple requests-based scraper.
|
|
41
|
-
|
|
42
|
-
## Example
|
|
43
|
-
|
|
44
|
-
```python
|
|
45
|
-
from fb_scraper_request import FacebookGraphqlScraper
|
|
46
|
-
|
|
47
|
-
fb = FacebookGraphqlScraper()
|
|
48
|
-
result = fb.get_user_posts("honghotduongpho.official00", days_limit=3)
|
|
49
|
-
|
|
50
|
-
for post in result["data"]:
|
|
51
|
-
print(post["context"])
|
|
52
|
-
print(f"Likes: {post['reaction_count.count']}")
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
Install: `pip install fb_scraper_request`
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
import setuptools
|
|
3
|
-
from setuptools import setup, find_packages
|
|
4
|
-
|
|
5
|
-
setup(
|
|
6
|
-
name='facebook-graphql-scraper',
|
|
7
|
-
version='1.1.4',
|
|
8
|
-
packages=[
|
|
9
|
-
"fb_graphql_scraper",
|
|
10
|
-
"fb_graphql_scraper.pages",
|
|
11
|
-
"fb_graphql_scraper.base",
|
|
12
|
-
"fb_graphql_scraper.tests",
|
|
13
|
-
"fb_graphql_scraper.utils",
|
|
14
|
-
],
|
|
15
|
-
license='MIT',
|
|
16
|
-
description='Implement Facebook scraper for post data retrieval',
|
|
17
|
-
long_description=open('README.md').read(),
|
|
18
|
-
long_description_content_type='text/markdown',
|
|
19
|
-
author='FaustRen',
|
|
20
|
-
author_email='faustren1z@gmail.com',
|
|
21
|
-
url='https://github.com/FaustRen/FB_graphql_scraper',
|
|
22
|
-
classifiers=[
|
|
23
|
-
"Programming Language :: Python :: 3.11",
|
|
24
|
-
"License :: OSI Approved :: MIT License",
|
|
25
|
-
"Operating System :: OS Independent",
|
|
26
|
-
],
|
|
27
|
-
python_requires='>=3.11',
|
|
28
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fb_scraper_request-0.2.1 → fb_scraper_request-0.2.3}/fb_scraper_request.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|