anysite-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of anysite-cli might be problematic. Click here for more details.
- anysite/__init__.py +4 -0
- anysite/__main__.py +6 -0
- anysite/api/__init__.py +21 -0
- anysite/api/client.py +271 -0
- anysite/api/errors.py +137 -0
- anysite/api/schemas.py +333 -0
- anysite/batch/__init__.py +1 -0
- anysite/batch/executor.py +176 -0
- anysite/batch/input.py +160 -0
- anysite/batch/rate_limiter.py +98 -0
- anysite/cli/__init__.py +1 -0
- anysite/cli/config.py +176 -0
- anysite/cli/executor.py +388 -0
- anysite/cli/options.py +249 -0
- anysite/config/__init__.py +11 -0
- anysite/config/paths.py +46 -0
- anysite/config/settings.py +187 -0
- anysite/dataset/__init__.py +37 -0
- anysite/dataset/analyzer.py +268 -0
- anysite/dataset/cli.py +644 -0
- anysite/dataset/collector.py +686 -0
- anysite/dataset/db_loader.py +248 -0
- anysite/dataset/errors.py +30 -0
- anysite/dataset/exporters.py +121 -0
- anysite/dataset/history.py +153 -0
- anysite/dataset/models.py +245 -0
- anysite/dataset/notifications.py +87 -0
- anysite/dataset/scheduler.py +107 -0
- anysite/dataset/storage.py +171 -0
- anysite/dataset/transformer.py +213 -0
- anysite/db/__init__.py +38 -0
- anysite/db/adapters/__init__.py +1 -0
- anysite/db/adapters/base.py +158 -0
- anysite/db/adapters/postgres.py +201 -0
- anysite/db/adapters/sqlite.py +183 -0
- anysite/db/cli.py +687 -0
- anysite/db/config.py +92 -0
- anysite/db/manager.py +166 -0
- anysite/db/operations/__init__.py +1 -0
- anysite/db/operations/insert.py +199 -0
- anysite/db/operations/query.py +43 -0
- anysite/db/schema/__init__.py +1 -0
- anysite/db/schema/inference.py +213 -0
- anysite/db/schema/types.py +71 -0
- anysite/db/utils/__init__.py +1 -0
- anysite/db/utils/sanitize.py +99 -0
- anysite/main.py +498 -0
- anysite/models/__init__.py +1 -0
- anysite/output/__init__.py +11 -0
- anysite/output/console.py +45 -0
- anysite/output/formatters.py +301 -0
- anysite/output/templates.py +76 -0
- anysite/py.typed +0 -0
- anysite/streaming/__init__.py +1 -0
- anysite/streaming/progress.py +121 -0
- anysite/streaming/writer.py +130 -0
- anysite/utils/__init__.py +1 -0
- anysite/utils/fields.py +242 -0
- anysite/utils/retry.py +109 -0
- anysite_cli-0.1.0.dist-info/METADATA +437 -0
- anysite_cli-0.1.0.dist-info/RECORD +64 -0
- anysite_cli-0.1.0.dist-info/WHEEL +4 -0
- anysite_cli-0.1.0.dist-info/entry_points.txt +2 -0
- anysite_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
anysite/utils/fields.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Enhanced field selection with nested paths, wildcards, and presets."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class FieldPath:
|
|
10
|
+
"""A single segment of a parsed field path."""
|
|
11
|
+
|
|
12
|
+
type: Literal["key", "index", "wildcard"]
|
|
13
|
+
value: str | int | None = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_field_path(path: str) -> list[FieldPath]:
|
|
17
|
+
"""Parse a field path string into segments.
|
|
18
|
+
|
|
19
|
+
Supports:
|
|
20
|
+
- Simple keys: "name"
|
|
21
|
+
- Dot notation: "experience.company"
|
|
22
|
+
- Array indexing: "experience[0].company"
|
|
23
|
+
- Wildcards: "experience[*].company"
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
path: Field path string
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
List of FieldPath segments
|
|
30
|
+
"""
|
|
31
|
+
segments: list[FieldPath] = []
|
|
32
|
+
# Split on dots, but handle brackets
|
|
33
|
+
parts = re.split(r"\.(?![^\[]*\])", path)
|
|
34
|
+
|
|
35
|
+
for part in parts:
|
|
36
|
+
# Check for array index or wildcard: field[0] or field[*]
|
|
37
|
+
match = re.match(r"^(\w+)\[(\*|\d+)\]$", part)
|
|
38
|
+
if match:
|
|
39
|
+
key, index = match.groups()
|
|
40
|
+
segments.append(FieldPath(type="key", value=key))
|
|
41
|
+
if index == "*":
|
|
42
|
+
segments.append(FieldPath(type="wildcard"))
|
|
43
|
+
else:
|
|
44
|
+
segments.append(FieldPath(type="index", value=int(index)))
|
|
45
|
+
else:
|
|
46
|
+
segments.append(FieldPath(type="key", value=part))
|
|
47
|
+
|
|
48
|
+
return segments
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def extract_field(data: Any, segments: list[FieldPath]) -> Any:
|
|
52
|
+
"""Extract a value from data following field path segments.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
data: Source data (dict, list, or scalar)
|
|
56
|
+
segments: Parsed field path segments
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Extracted value, or None if path doesn't exist
|
|
60
|
+
"""
|
|
61
|
+
current = data
|
|
62
|
+
|
|
63
|
+
for i, segment in enumerate(segments):
|
|
64
|
+
if current is None:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
if segment.type == "key":
|
|
68
|
+
if isinstance(current, dict):
|
|
69
|
+
current = current.get(segment.value) # type: ignore[arg-type]
|
|
70
|
+
else:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
elif segment.type == "index":
|
|
74
|
+
if isinstance(current, list) and isinstance(segment.value, int):
|
|
75
|
+
if segment.value < len(current):
|
|
76
|
+
current = current[segment.value]
|
|
77
|
+
else:
|
|
78
|
+
return None
|
|
79
|
+
else:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
elif segment.type == "wildcard":
|
|
83
|
+
if isinstance(current, list):
|
|
84
|
+
remaining = segments[i + 1:]
|
|
85
|
+
if remaining:
|
|
86
|
+
return [extract_field(item, remaining) for item in current]
|
|
87
|
+
return current
|
|
88
|
+
else:
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
return current
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def filter_fields(data: dict[str, Any], field_paths: list[str]) -> dict[str, Any]:
|
|
95
|
+
"""Filter a dictionary to include only specified fields.
|
|
96
|
+
|
|
97
|
+
Supports nested field paths with dot notation, array indexing,
|
|
98
|
+
and wildcards.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
data: Source dictionary
|
|
102
|
+
field_paths: List of field path strings
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Filtered dictionary
|
|
106
|
+
"""
|
|
107
|
+
if not field_paths:
|
|
108
|
+
return data
|
|
109
|
+
|
|
110
|
+
result: dict[str, Any] = {}
|
|
111
|
+
|
|
112
|
+
for path in field_paths:
|
|
113
|
+
segments = parse_field_path(path)
|
|
114
|
+
|
|
115
|
+
if len(segments) == 1 and segments[0].type == "key":
|
|
116
|
+
# Simple key access
|
|
117
|
+
key = segments[0].value
|
|
118
|
+
if isinstance(key, str) and key in data:
|
|
119
|
+
result[key] = data[key]
|
|
120
|
+
else:
|
|
121
|
+
# Complex path - extract value
|
|
122
|
+
value = extract_field(data, segments)
|
|
123
|
+
if value is not None:
|
|
124
|
+
# Store using the top-level key or flattened path
|
|
125
|
+
top_key = segments[0].value
|
|
126
|
+
if isinstance(top_key, str):
|
|
127
|
+
if len(segments) == 1:
|
|
128
|
+
result[top_key] = value
|
|
129
|
+
else:
|
|
130
|
+
# For nested paths, reconstruct nested structure
|
|
131
|
+
_set_nested(result, segments, value)
|
|
132
|
+
|
|
133
|
+
return result
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _set_nested(target: dict[str, Any], segments: list[FieldPath], value: Any) -> None:
|
|
137
|
+
"""Set a value in a nested dict structure following field path segments."""
|
|
138
|
+
current = target
|
|
139
|
+
|
|
140
|
+
for i, segment in enumerate(segments[:-1]):
|
|
141
|
+
if segment.type == "key":
|
|
142
|
+
key = segment.value
|
|
143
|
+
if isinstance(key, str):
|
|
144
|
+
if key not in current:
|
|
145
|
+
# Look ahead to determine container type
|
|
146
|
+
next_seg = segments[i + 1]
|
|
147
|
+
if next_seg.type == "index" or next_seg.type == "wildcard":
|
|
148
|
+
current[key] = []
|
|
149
|
+
else:
|
|
150
|
+
current[key] = {}
|
|
151
|
+
current = current[key]
|
|
152
|
+
elif segment.type in ("index", "wildcard"):
|
|
153
|
+
# For index/wildcard at intermediate level, just store value at parent
|
|
154
|
+
break
|
|
155
|
+
|
|
156
|
+
last = segments[-1]
|
|
157
|
+
if last.type == "key" and isinstance(last.value, str):
|
|
158
|
+
if isinstance(current, dict):
|
|
159
|
+
current[last.value] = value
|
|
160
|
+
elif isinstance(current, dict):
|
|
161
|
+
# Store with the dotted path as key for complex paths
|
|
162
|
+
path_str = ".".join(
|
|
163
|
+
str(s.value) if s.type != "wildcard" else "*"
|
|
164
|
+
for s in segments
|
|
165
|
+
)
|
|
166
|
+
current[path_str] = value
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def exclude_fields(data: dict[str, Any], field_names: list[str]) -> dict[str, Any]:
|
|
170
|
+
"""Remove specified fields from a dictionary.
|
|
171
|
+
|
|
172
|
+
Supports top-level and nested dot-notation keys.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
data: Source dictionary
|
|
176
|
+
field_names: List of field names to remove
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Dictionary with specified fields removed
|
|
180
|
+
"""
|
|
181
|
+
import copy
|
|
182
|
+
|
|
183
|
+
result = copy.deepcopy(data)
|
|
184
|
+
|
|
185
|
+
for field_name in field_names:
|
|
186
|
+
if "." in field_name:
|
|
187
|
+
# Nested field removal
|
|
188
|
+
parts = field_name.split(".")
|
|
189
|
+
current = result
|
|
190
|
+
for part in parts[:-1]:
|
|
191
|
+
if isinstance(current, dict) and part in current:
|
|
192
|
+
current = current[part]
|
|
193
|
+
else:
|
|
194
|
+
break
|
|
195
|
+
else:
|
|
196
|
+
if isinstance(current, dict) and parts[-1] in current:
|
|
197
|
+
del current[parts[-1]]
|
|
198
|
+
else:
|
|
199
|
+
result.pop(field_name, None)
|
|
200
|
+
|
|
201
|
+
return result
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
# Built-in field presets
|
|
205
|
+
BUILT_IN_PRESETS: dict[str, list[str]] = {
|
|
206
|
+
"minimal": ["name", "full_name", "headline", "url", "linkedin_url"],
|
|
207
|
+
"contact": ["name", "full_name", "email", "phone", "linkedin_url", "twitter_url"],
|
|
208
|
+
"recruiting": [
|
|
209
|
+
"name", "full_name", "headline", "company", "current_company",
|
|
210
|
+
"experience", "skills", "location", "linkedin_url",
|
|
211
|
+
],
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def resolve_fields_preset(preset_name: str) -> list[str] | None:
|
|
216
|
+
"""Resolve a field preset name to a list of fields.
|
|
217
|
+
|
|
218
|
+
Checks built-in presets first, then user config.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
preset_name: Name of the preset
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
List of field names, or None if preset not found
|
|
225
|
+
"""
|
|
226
|
+
# Check built-in presets
|
|
227
|
+
if preset_name in BUILT_IN_PRESETS:
|
|
228
|
+
return BUILT_IN_PRESETS[preset_name]
|
|
229
|
+
|
|
230
|
+
# Check user config
|
|
231
|
+
try:
|
|
232
|
+
from anysite.config.settings import get_config_value
|
|
233
|
+
|
|
234
|
+
custom = get_config_value(f"presets.{preset_name}")
|
|
235
|
+
if isinstance(custom, str):
|
|
236
|
+
return [f.strip() for f in custom.split(",")]
|
|
237
|
+
if isinstance(custom, list):
|
|
238
|
+
return custom
|
|
239
|
+
except Exception:
|
|
240
|
+
pass
|
|
241
|
+
|
|
242
|
+
return None
|
anysite/utils/retry.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Retry logic with exponential backoff."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import random
|
|
5
|
+
from collections.abc import Awaitable, Callable
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from anysite.api.errors import (
|
|
10
|
+
NetworkError,
|
|
11
|
+
RateLimitError,
|
|
12
|
+
ServerError,
|
|
13
|
+
)
|
|
14
|
+
from anysite.api.errors import (
|
|
15
|
+
TimeoutError as AnysiteTimeoutError,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class RetryConfig:
|
|
21
|
+
"""Configuration for retry behavior."""
|
|
22
|
+
|
|
23
|
+
max_attempts: int = 3
|
|
24
|
+
initial_delay: float = 1.0
|
|
25
|
+
max_delay: float = 60.0
|
|
26
|
+
exponential_base: float = 2.0
|
|
27
|
+
jitter: bool = True
|
|
28
|
+
retry_on: tuple[type[Exception], ...] = field(
|
|
29
|
+
default_factory=lambda: (
|
|
30
|
+
RateLimitError,
|
|
31
|
+
ServerError,
|
|
32
|
+
NetworkError,
|
|
33
|
+
AnysiteTimeoutError,
|
|
34
|
+
)
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def calculate_delay(attempt: int, config: RetryConfig) -> float:
|
|
39
|
+
"""Calculate delay for retry attempt using exponential backoff.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
attempt: Zero-based attempt number
|
|
43
|
+
config: Retry configuration
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Delay in seconds
|
|
47
|
+
"""
|
|
48
|
+
delay = config.initial_delay * (config.exponential_base ** attempt)
|
|
49
|
+
delay = min(delay, config.max_delay)
|
|
50
|
+
|
|
51
|
+
if config.jitter:
|
|
52
|
+
delay = delay * (0.5 + random.random() * 0.5)
|
|
53
|
+
|
|
54
|
+
return delay
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def should_retry(exception: Exception, config: RetryConfig) -> bool:
|
|
58
|
+
"""Determine if an exception should trigger a retry.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
exception: The exception to check
|
|
62
|
+
config: Retry configuration
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
True if the exception is retryable
|
|
66
|
+
"""
|
|
67
|
+
return isinstance(exception, config.retry_on)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def retry_async(
|
|
71
|
+
func: Callable[..., Awaitable[Any]],
|
|
72
|
+
config: RetryConfig | None = None,
|
|
73
|
+
*args: Any,
|
|
74
|
+
**kwargs: Any,
|
|
75
|
+
) -> Any:
|
|
76
|
+
"""Execute an async function with retry logic.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
func: Async function to execute
|
|
80
|
+
config: Retry configuration (uses defaults if None)
|
|
81
|
+
*args: Positional arguments for func
|
|
82
|
+
**kwargs: Keyword arguments for func
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Result of the function
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
The last exception if all retries are exhausted
|
|
89
|
+
"""
|
|
90
|
+
if config is None:
|
|
91
|
+
config = RetryConfig()
|
|
92
|
+
|
|
93
|
+
last_error: Exception | None = None
|
|
94
|
+
|
|
95
|
+
for attempt in range(config.max_attempts):
|
|
96
|
+
try:
|
|
97
|
+
return await func(*args, **kwargs)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
last_error = e
|
|
100
|
+
if not should_retry(e, config):
|
|
101
|
+
raise
|
|
102
|
+
|
|
103
|
+
if attempt < config.max_attempts - 1:
|
|
104
|
+
delay = calculate_delay(attempt, config)
|
|
105
|
+
await asyncio.sleep(delay)
|
|
106
|
+
else:
|
|
107
|
+
raise
|
|
108
|
+
|
|
109
|
+
raise last_error # type: ignore[misc]
|