pypproxy 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. pypproxy/__init__.py +0 -0
  2. pypproxy/api/__init__.py +0 -0
  3. pypproxy/api/server.py +427 -0
  4. pypproxy/bulk/__init__.py +0 -0
  5. pypproxy/bulk/sender.py +97 -0
  6. pypproxy/cert/__init__.py +0 -0
  7. pypproxy/cert/ca.py +144 -0
  8. pypproxy/cert/client_cert.py +65 -0
  9. pypproxy/codec.py +176 -0
  10. pypproxy/config/__init__.py +0 -0
  11. pypproxy/config/config.py +106 -0
  12. pypproxy/dns/__init__.py +0 -0
  13. pypproxy/dns/server.py +149 -0
  14. pypproxy/exporter/__init__.py +0 -0
  15. pypproxy/exporter/exporter.py +122 -0
  16. pypproxy/exporter/importer.py +169 -0
  17. pypproxy/graphql/__init__.py +0 -0
  18. pypproxy/graphql/detector.py +76 -0
  19. pypproxy/graphql/introspection.py +217 -0
  20. pypproxy/graphql/modifier.py +98 -0
  21. pypproxy/graphql/schema_store.py +33 -0
  22. pypproxy/intercept/__init__.py +0 -0
  23. pypproxy/intercept/manager.py +142 -0
  24. pypproxy/interceptor/__init__.py +0 -0
  25. pypproxy/interceptor/interceptor.py +172 -0
  26. pypproxy/proto/__init__.py +0 -0
  27. pypproxy/proto/grpc.py +48 -0
  28. pypproxy/proto/mqtt.py +119 -0
  29. pypproxy/proto/ws.py +120 -0
  30. pypproxy/proto/ws_intercept.py +117 -0
  31. pypproxy/proxy/__init__.py +0 -0
  32. pypproxy/proxy/proxy.py +407 -0
  33. pypproxy/replay/__init__.py +0 -0
  34. pypproxy/replay/replay.py +77 -0
  35. pypproxy/rule/__init__.py +0 -0
  36. pypproxy/rule/rule.py +198 -0
  37. pypproxy/scan/__init__.py +0 -0
  38. pypproxy/scan/scanner.py +296 -0
  39. pypproxy/script/__init__.py +0 -0
  40. pypproxy/script/engine.py +49 -0
  41. pypproxy/security/__init__.py +0 -0
  42. pypproxy/security/header_checker.py +308 -0
  43. pypproxy/security/int_overflow.py +193 -0
  44. pypproxy/security/jwt_checker.py +273 -0
  45. pypproxy/security/plugin.py +152 -0
  46. pypproxy/security/randomness.py +165 -0
  47. pypproxy/store/__init__.py +0 -0
  48. pypproxy/store/db.py +189 -0
  49. pypproxy/store/filter_parser.py +181 -0
  50. pypproxy/store/fts.py +105 -0
  51. pypproxy/store/models.py +81 -0
  52. pypproxy/store/scope.py +63 -0
  53. pypproxy/store/store.py +120 -0
  54. pypproxy/ui/__init__.py +0 -0
  55. pypproxy/ui/app.py +386 -0
  56. pypproxy/ui/bulk_sender_ui.py +125 -0
  57. pypproxy/ui/cui.py +162 -0
  58. pypproxy/ui/detail.py +179 -0
  59. pypproxy/ui/diff_view.py +118 -0
  60. pypproxy/ui/graphql_tab.py +265 -0
  61. pypproxy/ui/import_tab.py +136 -0
  62. pypproxy/ui/intercept_dialog.py +74 -0
  63. pypproxy/ui/resender.py +140 -0
  64. pypproxy/ui/scan_tab.py +98 -0
  65. pypproxy/ui/security_tab.py +356 -0
  66. pypproxy/ui/settings.py +413 -0
  67. pypproxy/ui/theme.py +59 -0
  68. pypproxy-0.1.0.dist-info/METADATA +19 -0
  69. pypproxy-0.1.0.dist-info/RECORD +72 -0
  70. pypproxy-0.1.0.dist-info/WHEEL +4 -0
  71. pypproxy-0.1.0.dist-info/entry_points.txt +2 -0
  72. pypproxy-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,169 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import json
5
+ import logging
6
+ from datetime import UTC, datetime
7
+ from pathlib import Path
8
+
9
+ from pypproxy.store.models import Entry
10
+ from pypproxy.store.store import Store
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def import_har(data: str | bytes, store: Store) -> int:
16
+ """Import entries from a HAR file string/bytes. Returns count imported."""
17
+ if isinstance(data, bytes):
18
+ data = data.decode("utf-8", errors="replace")
19
+
20
+ har = json.loads(data)
21
+ entries = har.get("log", har).get("entries", [])
22
+ count = 0
23
+
24
+ for item in entries:
25
+ try:
26
+ entry = _har_entry_to_entry(item)
27
+ store.add(entry)
28
+ count += 1
29
+ except Exception as e:
30
+ logger.debug("Skipping HAR entry: %s", e)
31
+
32
+ return count
33
+
34
+
35
+ def import_json(data: str | bytes, store: Store) -> int:
36
+ """Import entries from a paxy JSON export. Returns count imported."""
37
+ if isinstance(data, bytes):
38
+ data = data.decode("utf-8", errors="replace")
39
+
40
+ parsed = json.loads(data)
41
+ items = parsed.get("entries", []) if isinstance(parsed, dict) else parsed
42
+
43
+ count = 0
44
+ for item in items:
45
+ try:
46
+ entry = _json_to_entry(item)
47
+ store.add(entry)
48
+ count += 1
49
+ except Exception as e:
50
+ logger.debug("Skipping entry: %s", e)
51
+
52
+ return count
53
+
54
+
55
+ def import_file(path: str, store: Store) -> int:
56
+ """Auto-detect format from file extension and import."""
57
+ p = Path(path)
58
+ data = p.read_bytes()
59
+ if p.suffix.lower() == ".har":
60
+ return import_har(data, store)
61
+ # Try HAR first, then paxy JSON
62
+ try:
63
+ parsed = json.loads(data)
64
+ if "log" in parsed and "entries" in parsed.get("log", {}):
65
+ return import_har(data, store)
66
+ return import_json(data, store)
67
+ except Exception as e:
68
+ raise ValueError(f"Cannot parse {path}: {e}") from e
69
+
70
+
71
+ def _har_entry_to_entry(item: dict) -> Entry:
72
+ req = item.get("request", {})
73
+ resp = item.get("response", {})
74
+
75
+ # Parse URL
76
+ url = req.get("url", "")
77
+ scheme, _, rest = url.partition("://")
78
+ host, _, path_q = rest.partition("/")
79
+ path, _, query = ("/" + path_q).partition("?")
80
+ query = query or ""
81
+
82
+ # Request headers
83
+ req_headers: dict[str, list[str]] = {}
84
+ for h in req.get("headers", []):
85
+ req_headers.setdefault(h["name"].lower(), []).append(h["value"])
86
+
87
+ # Request body
88
+ req_body = b""
89
+ if post_data := req.get("postData", {}):
90
+ text = post_data.get("text", "")
91
+ req_body = text.encode() if text else b""
92
+
93
+ # Response headers
94
+ resp_headers: dict[str, list[str]] = {}
95
+ for h in resp.get("headers", []):
96
+ resp_headers.setdefault(h["name"].lower(), []).append(h["value"])
97
+
98
+ # Response body
99
+ resp_body = b""
100
+ content = resp.get("content", {})
101
+ if text := content.get("text", ""):
102
+ if content.get("encoding") == "base64":
103
+ resp_body = base64.b64decode(text)
104
+ else:
105
+ resp_body = text.encode("utf-8", errors="replace")
106
+
107
+ # Timestamp
108
+ started = item.get("startedDateTime", "")
109
+ try:
110
+ created_at = datetime.fromisoformat(started.replace("Z", "+00:00"))
111
+ except Exception:
112
+ created_at = datetime.now(UTC)
113
+
114
+ return Entry(
115
+ method=req.get("method", "GET"),
116
+ scheme=scheme or "https",
117
+ host=host,
118
+ path=path or "/",
119
+ query=query,
120
+ req_headers=req_headers,
121
+ req_body=req_body,
122
+ status_code=resp.get("status", 0),
123
+ resp_headers=resp_headers,
124
+ resp_body=resp_body,
125
+ duration_ms=int(item.get("time", 0)),
126
+ protocol="https" if scheme == "https" else "http",
127
+ created_at=created_at,
128
+ )
129
+
130
+
131
+ def _json_to_entry(item: dict) -> Entry:
132
+ req_body = b""
133
+ if rb := item.get("req_body", ""):
134
+ try:
135
+ req_body = base64.b64decode(rb)
136
+ except Exception:
137
+ req_body = rb.encode()
138
+
139
+ resp_body = b""
140
+ if rsb := item.get("resp_body", ""):
141
+ try:
142
+ resp_body = base64.b64decode(rsb)
143
+ except Exception:
144
+ resp_body = rsb.encode()
145
+
146
+ created_at = datetime.now(UTC)
147
+ if ts := item.get("created_at", ""):
148
+ import contextlib
149
+
150
+ with contextlib.suppress(Exception):
151
+ created_at = datetime.fromisoformat(ts)
152
+
153
+ return Entry(
154
+ method=item.get("method", "GET"),
155
+ scheme=item.get("scheme", "https"),
156
+ host=item.get("host", ""),
157
+ path=item.get("path", "/"),
158
+ query=item.get("query", ""),
159
+ req_headers=item.get("req_headers", {}),
160
+ req_body=req_body,
161
+ status_code=item.get("status_code", 0),
162
+ resp_headers=item.get("resp_headers", {}),
163
+ resp_body=resp_body,
164
+ duration_ms=item.get("duration_ms", 0),
165
+ protocol=item.get("protocol", "https"),
166
+ tags=item.get("tags", []),
167
+ modified=item.get("modified", False),
168
+ created_at=created_at,
169
+ )
File without changes
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+
6
+ from pypproxy.store.models import Entry
7
+
8
+
9
+ def is_graphql(entry: Entry) -> bool:
10
+ """Return True if the entry looks like a GraphQL request."""
11
+ ct = entry.req_headers.get("content-type", [""])[0].lower()
12
+ if "graphql" in ct:
13
+ return True
14
+
15
+ # POST with JSON body containing 'query' key
16
+ if entry.method == "POST" and entry.req_body:
17
+ try:
18
+ body = json.loads(entry.req_body)
19
+ if isinstance(body, dict) and "query" in body:
20
+ q = body["query"].strip()
21
+ if q.startswith(("{", "query", "mutation", "subscription", "fragment")):
22
+ return True
23
+ except Exception:
24
+ pass
25
+
26
+ # GET with 'query' parameter
27
+ if entry.method == "GET" and entry.query and "query=" in entry.query:
28
+ return True
29
+
30
+ # common GraphQL paths
31
+ return bool(re.search(r"/graphql", entry.path, re.IGNORECASE))
32
+
33
+
34
+ def parse_operation(body: bytes) -> dict:
35
+ """Parse a GraphQL request body. Returns dict with query/variables/operationName."""
36
+ try:
37
+ data = json.loads(body)
38
+ if isinstance(data, dict):
39
+ return {
40
+ "query": data.get("query", ""),
41
+ "variables": data.get("variables", {}),
42
+ "operationName": data.get("operationName", ""),
43
+ }
44
+ except Exception:
45
+ pass
46
+ return {"query": body.decode("utf-8", errors="replace"), "variables": {}, "operationName": ""}
47
+
48
+
49
+ def extract_operation_type(query: str) -> str:
50
+ """Return 'query', 'mutation', 'subscription', or 'unknown'."""
51
+ q = query.strip().lower()
52
+ if q.startswith("mutation"):
53
+ return "mutation"
54
+ if q.startswith("subscription"):
55
+ return "subscription"
56
+ if q.startswith("query") or q.startswith("{"):
57
+ return "query"
58
+ return "unknown"
59
+
60
+
61
+ def extract_operation_name(query: str) -> str:
62
+ """Extract the operation name from a GraphQL query string."""
63
+ m = re.search(r"(?:query|mutation|subscription)\s+(\w+)", query)
64
+ if m:
65
+ return m.group(1)
66
+ return ""
67
+
68
+
69
+ def extract_field_names(query: str) -> list[str]:
70
+ """Extract top-level field names from a GraphQL query (heuristic)."""
71
+ # Remove strings, comments, and directives to avoid false positives
72
+ clean = re.sub(r'"[^"]*"', '""', query)
73
+ clean = re.sub(r"#[^\n]*", "", clean)
74
+ # Find word tokens after '{' that look like field names
75
+ fields = re.findall(r"[{,]\s*(\w+)\s*[({:\s]", clean)
76
+ return list(dict.fromkeys(fields)) # dedupe preserving order
@@ -0,0 +1,217 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from dataclasses import dataclass, field
5
+
6
+ import httpx
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ INTROSPECTION_QUERY = """
11
+ query IntrospectionQuery {
12
+ __schema {
13
+ queryType { name }
14
+ mutationType { name }
15
+ subscriptionType { name }
16
+ types {
17
+ ...FullType
18
+ }
19
+ directives {
20
+ name
21
+ description
22
+ locations
23
+ args { ...InputValue }
24
+ }
25
+ }
26
+ }
27
+
28
+ fragment FullType on __Type {
29
+ kind
30
+ name
31
+ description
32
+ fields(includeDeprecated: true) {
33
+ name
34
+ description
35
+ args { ...InputValue }
36
+ type { ...TypeRef }
37
+ isDeprecated
38
+ deprecationReason
39
+ }
40
+ inputFields { ...InputValue }
41
+ interfaces { ...TypeRef }
42
+ enumValues(includeDeprecated: true) {
43
+ name
44
+ description
45
+ isDeprecated
46
+ deprecationReason
47
+ }
48
+ possibleTypes { ...TypeRef }
49
+ }
50
+
51
+ fragment InputValue on __InputValue {
52
+ name
53
+ description
54
+ type { ...TypeRef }
55
+ defaultValue
56
+ }
57
+
58
+ fragment TypeRef on __Type {
59
+ kind
60
+ name
61
+ ofType {
62
+ kind
63
+ name
64
+ ofType {
65
+ kind
66
+ name
67
+ ofType {
68
+ kind
69
+ name
70
+ ofType { kind name ofType { kind name ofType { kind name ofType { kind name } } } }
71
+ }
72
+ }
73
+ }
74
+ }
75
+ """
76
+
77
+
78
+ @dataclass
79
+ class GraphQLField:
80
+ name: str
81
+ description: str
82
+ type_name: str
83
+ args: list[str] = field(default_factory=list)
84
+ is_deprecated: bool = False
85
+
86
+
87
+ @dataclass
88
+ class GraphQLType:
89
+ name: str
90
+ kind: str
91
+ description: str
92
+ fields: list[GraphQLField] = field(default_factory=list)
93
+
94
+
95
+ @dataclass
96
+ class GraphQLSchema:
97
+ query_type: str = ""
98
+ mutation_type: str = ""
99
+ subscription_type: str = ""
100
+ types: list[GraphQLType] = field(default_factory=list)
101
+ raw: dict = field(default_factory=dict)
102
+
103
+ def to_dict(self) -> dict:
104
+ return {
105
+ "query_type": self.query_type,
106
+ "mutation_type": self.mutation_type,
107
+ "subscription_type": self.subscription_type,
108
+ "types": [
109
+ {
110
+ "name": t.name,
111
+ "kind": t.kind,
112
+ "description": t.description,
113
+ "fields": [
114
+ {
115
+ "name": f.name,
116
+ "description": f.description,
117
+ "type": f.type_name,
118
+ "args": f.args,
119
+ "is_deprecated": f.is_deprecated,
120
+ }
121
+ for f in t.fields
122
+ ],
123
+ }
124
+ for t in self.types
125
+ if not t.name.startswith("__") # exclude built-in types
126
+ ],
127
+ }
128
+
129
+ def get_type(self, name: str) -> GraphQLType | None:
130
+ for t in self.types:
131
+ if t.name == name:
132
+ return t
133
+ return None
134
+
135
+ def root_fields(self) -> list[GraphQLField]:
136
+ """Return fields of the Query root type."""
137
+ t = self.get_type(self.query_type or "Query")
138
+ return t.fields if t else []
139
+
140
+ def mutation_fields(self) -> list[GraphQLField]:
141
+ t = self.get_type(self.mutation_type or "Mutation")
142
+ return t.fields if t else []
143
+
144
+
145
+ async def fetch_schema(
146
+ url: str,
147
+ headers: dict[str, str] | None = None,
148
+ timeout: int = 15,
149
+ ) -> GraphQLSchema | None:
150
+ """Send an introspection query and parse the schema."""
151
+ req_headers = {"content-type": "application/json"}
152
+ if headers:
153
+ req_headers.update(headers)
154
+
155
+ try:
156
+ async with httpx.AsyncClient(verify=False, timeout=timeout, http2=True) as client:
157
+ resp = await client.post(
158
+ url,
159
+ json={"query": INTROSPECTION_QUERY},
160
+ headers=req_headers,
161
+ )
162
+ data = resp.json()
163
+ except Exception as e:
164
+ logger.warning("Introspection failed for %s: %s", url, e)
165
+ return None
166
+
167
+ if "errors" in data and not data.get("data"):
168
+ logger.warning("Introspection returned errors: %s", data["errors"])
169
+ return None
170
+
171
+ schema_data = data.get("data", {}).get("__schema", {})
172
+ if not schema_data:
173
+ return None
174
+
175
+ return _parse_schema(schema_data, data)
176
+
177
+
178
+ def _parse_schema(schema_data: dict, raw: dict) -> GraphQLSchema:
179
+ schema = GraphQLSchema(
180
+ query_type=(schema_data.get("queryType") or {}).get("name", ""),
181
+ mutation_type=(schema_data.get("mutationType") or {}).get("name", ""),
182
+ subscription_type=(schema_data.get("subscriptionType") or {}).get("name", ""),
183
+ raw=raw,
184
+ )
185
+
186
+ for type_data in schema_data.get("types", []):
187
+ gql_type = GraphQLType(
188
+ name=type_data.get("name", ""),
189
+ kind=type_data.get("kind", ""),
190
+ description=type_data.get("description", "") or "",
191
+ )
192
+ for field_data in type_data.get("fields") or []:
193
+ gql_field = GraphQLField(
194
+ name=field_data.get("name", ""),
195
+ description=field_data.get("description", "") or "",
196
+ type_name=_type_ref_to_str(field_data.get("type", {})),
197
+ args=[a.get("name", "") for a in field_data.get("args", [])],
198
+ is_deprecated=field_data.get("isDeprecated", False),
199
+ )
200
+ gql_type.fields.append(gql_field)
201
+ schema.types.append(gql_type)
202
+
203
+ return schema
204
+
205
+
206
+ def _type_ref_to_str(type_ref: dict) -> str:
207
+ if not type_ref:
208
+ return ""
209
+ kind = type_ref.get("kind", "")
210
+ name = type_ref.get("name", "")
211
+ of_type = type_ref.get("ofType")
212
+
213
+ if kind == "NON_NULL":
214
+ return f"{_type_ref_to_str(of_type)}!"
215
+ if kind == "LIST":
216
+ return f"[{_type_ref_to_str(of_type)}]"
217
+ return name or ""
@@ -0,0 +1,98 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from typing import Any
6
+
7
+
8
+ def set_variable(body: bytes, var_name: str, new_value: Any) -> bytes:
9
+ """Replace a variable value in a GraphQL request body."""
10
+ try:
11
+ data = json.loads(body)
12
+ if isinstance(data, dict) and "variables" in data and isinstance(data["variables"], dict):
13
+ data["variables"][var_name] = new_value
14
+ return json.dumps(data).encode()
15
+ except Exception:
16
+ pass
17
+ return body
18
+
19
+
20
+ def replace_field_alias(body: bytes, field: str, alias: str) -> bytes:
21
+ """Add an alias to a field in the query string."""
22
+ try:
23
+ data = json.loads(body)
24
+ if "query" in data:
25
+ data["query"] = re.sub(
26
+ rf"\b{re.escape(field)}\b",
27
+ f"{alias}: {field}",
28
+ data["query"],
29
+ count=1,
30
+ )
31
+ return json.dumps(data).encode()
32
+ except Exception:
33
+ pass
34
+ return body
35
+
36
+
37
+ def inject_field(body: bytes, type_name: str, extra_fields: list[str]) -> bytes:
38
+ """
39
+ Inject extra fields into the selection set of a named type in the query.
40
+ E.g. inject_field(body, "User", ["__typename", "id"]) adds those fields
41
+ after the opening brace of User { ...
42
+ """
43
+ try:
44
+ data = json.loads(body)
45
+ if "query" in data:
46
+ extra = " ".join(extra_fields)
47
+ # insert after the type name's opening brace
48
+ data["query"] = re.sub(
49
+ rf"\b{re.escape(type_name)}\s*{{",
50
+ f"{type_name} {{ {extra} ",
51
+ data["query"],
52
+ count=1,
53
+ )
54
+ return json.dumps(data).encode()
55
+ except Exception:
56
+ pass
57
+ return body
58
+
59
+
60
+ def strip_operation_name(body: bytes) -> bytes:
61
+ """Remove operationName to test anonymous query handling."""
62
+ try:
63
+ data = json.loads(body)
64
+ if isinstance(data, dict):
65
+ data.pop("operationName", None)
66
+ return json.dumps(data).encode()
67
+ except Exception:
68
+ pass
69
+ return body
70
+
71
+
72
+ def add_introspection_field(body: bytes) -> bytes:
73
+ """Inject __typename into every selection set as a probe."""
74
+ try:
75
+ data = json.loads(body)
76
+ if "query" in data:
77
+ data["query"] = re.sub(r"{\s*", "{ __typename ", data["query"])
78
+ return json.dumps(data).encode()
79
+ except Exception:
80
+ pass
81
+ return body
82
+
83
+
84
+ def build_query(fields: list[str], type_name: str = "") -> bytes:
85
+ """Build a simple GraphQL query for the given field list."""
86
+ selection = "\n ".join(fields)
87
+ query = f"{{ {selection} }}"
88
+ return json.dumps({"query": query}).encode()
89
+
90
+
91
+ def build_mutation(mutation_name: str, args: dict, return_fields: list[str]) -> bytes:
92
+ """Build a GraphQL mutation."""
93
+ arg_str = ", ".join(
94
+ f'{k}: "{v}"' if isinstance(v, str) else f"{k}: {v}" for k, v in args.items()
95
+ )
96
+ return_str = " ".join(return_fields)
97
+ query = f"mutation {{ {mutation_name}({arg_str}) {{ {return_str} }} }}"
98
+ return json.dumps({"query": query}).encode()
@@ -0,0 +1,33 @@
1
+ from __future__ import annotations
2
+
3
+ import threading
4
+
5
+ from .introspection import GraphQLSchema
6
+
7
+
8
+ class SchemaStore:
9
+ """Per-host schema cache."""
10
+
11
+ def __init__(self) -> None:
12
+ self._schemas: dict[str, GraphQLSchema] = {}
13
+ self._lock = threading.Lock()
14
+
15
+ def set(self, host: str, schema: GraphQLSchema) -> None:
16
+ with self._lock:
17
+ self._schemas[host] = schema
18
+
19
+ def get(self, host: str) -> GraphQLSchema | None:
20
+ with self._lock:
21
+ return self._schemas.get(host)
22
+
23
+ def list_hosts(self) -> list[str]:
24
+ with self._lock:
25
+ return list(self._schemas.keys())
26
+
27
+ def delete(self, host: str) -> None:
28
+ with self._lock:
29
+ self._schemas.pop(host, None)
30
+
31
+ def all(self) -> dict[str, GraphQLSchema]:
32
+ with self._lock:
33
+ return dict(self._schemas)
File without changes