anysite-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anysite-cli might be problematic. Click here for more details.

Files changed (64) hide show
  1. anysite/__init__.py +4 -0
  2. anysite/__main__.py +6 -0
  3. anysite/api/__init__.py +21 -0
  4. anysite/api/client.py +271 -0
  5. anysite/api/errors.py +137 -0
  6. anysite/api/schemas.py +333 -0
  7. anysite/batch/__init__.py +1 -0
  8. anysite/batch/executor.py +176 -0
  9. anysite/batch/input.py +160 -0
  10. anysite/batch/rate_limiter.py +98 -0
  11. anysite/cli/__init__.py +1 -0
  12. anysite/cli/config.py +176 -0
  13. anysite/cli/executor.py +388 -0
  14. anysite/cli/options.py +249 -0
  15. anysite/config/__init__.py +11 -0
  16. anysite/config/paths.py +46 -0
  17. anysite/config/settings.py +187 -0
  18. anysite/dataset/__init__.py +37 -0
  19. anysite/dataset/analyzer.py +268 -0
  20. anysite/dataset/cli.py +644 -0
  21. anysite/dataset/collector.py +686 -0
  22. anysite/dataset/db_loader.py +248 -0
  23. anysite/dataset/errors.py +30 -0
  24. anysite/dataset/exporters.py +121 -0
  25. anysite/dataset/history.py +153 -0
  26. anysite/dataset/models.py +245 -0
  27. anysite/dataset/notifications.py +87 -0
  28. anysite/dataset/scheduler.py +107 -0
  29. anysite/dataset/storage.py +171 -0
  30. anysite/dataset/transformer.py +213 -0
  31. anysite/db/__init__.py +38 -0
  32. anysite/db/adapters/__init__.py +1 -0
  33. anysite/db/adapters/base.py +158 -0
  34. anysite/db/adapters/postgres.py +201 -0
  35. anysite/db/adapters/sqlite.py +183 -0
  36. anysite/db/cli.py +687 -0
  37. anysite/db/config.py +92 -0
  38. anysite/db/manager.py +166 -0
  39. anysite/db/operations/__init__.py +1 -0
  40. anysite/db/operations/insert.py +199 -0
  41. anysite/db/operations/query.py +43 -0
  42. anysite/db/schema/__init__.py +1 -0
  43. anysite/db/schema/inference.py +213 -0
  44. anysite/db/schema/types.py +71 -0
  45. anysite/db/utils/__init__.py +1 -0
  46. anysite/db/utils/sanitize.py +99 -0
  47. anysite/main.py +498 -0
  48. anysite/models/__init__.py +1 -0
  49. anysite/output/__init__.py +11 -0
  50. anysite/output/console.py +45 -0
  51. anysite/output/formatters.py +301 -0
  52. anysite/output/templates.py +76 -0
  53. anysite/py.typed +0 -0
  54. anysite/streaming/__init__.py +1 -0
  55. anysite/streaming/progress.py +121 -0
  56. anysite/streaming/writer.py +130 -0
  57. anysite/utils/__init__.py +1 -0
  58. anysite/utils/fields.py +242 -0
  59. anysite/utils/retry.py +109 -0
  60. anysite_cli-0.1.0.dist-info/METADATA +437 -0
  61. anysite_cli-0.1.0.dist-info/RECORD +64 -0
  62. anysite_cli-0.1.0.dist-info/WHEEL +4 -0
  63. anysite_cli-0.1.0.dist-info/entry_points.txt +2 -0
  64. anysite_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,242 @@
1
+ """Enhanced field selection with nested paths, wildcards, and presets."""
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import Any, Literal
6
+
7
+
8
+ @dataclass
9
+ class FieldPath:
10
+ """A single segment of a parsed field path."""
11
+
12
+ type: Literal["key", "index", "wildcard"]
13
+ value: str | int | None = None
14
+
15
+
16
+ def parse_field_path(path: str) -> list[FieldPath]:
17
+ """Parse a field path string into segments.
18
+
19
+ Supports:
20
+ - Simple keys: "name"
21
+ - Dot notation: "experience.company"
22
+ - Array indexing: "experience[0].company"
23
+ - Wildcards: "experience[*].company"
24
+
25
+ Args:
26
+ path: Field path string
27
+
28
+ Returns:
29
+ List of FieldPath segments
30
+ """
31
+ segments: list[FieldPath] = []
32
+ # Split on dots, but handle brackets
33
+ parts = re.split(r"\.(?![^\[]*\])", path)
34
+
35
+ for part in parts:
36
+ # Check for array index or wildcard: field[0] or field[*]
37
+ match = re.match(r"^(\w+)\[(\*|\d+)\]$", part)
38
+ if match:
39
+ key, index = match.groups()
40
+ segments.append(FieldPath(type="key", value=key))
41
+ if index == "*":
42
+ segments.append(FieldPath(type="wildcard"))
43
+ else:
44
+ segments.append(FieldPath(type="index", value=int(index)))
45
+ else:
46
+ segments.append(FieldPath(type="key", value=part))
47
+
48
+ return segments
49
+
50
+
51
+ def extract_field(data: Any, segments: list[FieldPath]) -> Any:
52
+ """Extract a value from data following field path segments.
53
+
54
+ Args:
55
+ data: Source data (dict, list, or scalar)
56
+ segments: Parsed field path segments
57
+
58
+ Returns:
59
+ Extracted value, or None if path doesn't exist
60
+ """
61
+ current = data
62
+
63
+ for i, segment in enumerate(segments):
64
+ if current is None:
65
+ return None
66
+
67
+ if segment.type == "key":
68
+ if isinstance(current, dict):
69
+ current = current.get(segment.value) # type: ignore[arg-type]
70
+ else:
71
+ return None
72
+
73
+ elif segment.type == "index":
74
+ if isinstance(current, list) and isinstance(segment.value, int):
75
+ if segment.value < len(current):
76
+ current = current[segment.value]
77
+ else:
78
+ return None
79
+ else:
80
+ return None
81
+
82
+ elif segment.type == "wildcard":
83
+ if isinstance(current, list):
84
+ remaining = segments[i + 1:]
85
+ if remaining:
86
+ return [extract_field(item, remaining) for item in current]
87
+ return current
88
+ else:
89
+ return None
90
+
91
+ return current
92
+
93
+
94
+ def filter_fields(data: dict[str, Any], field_paths: list[str]) -> dict[str, Any]:
95
+ """Filter a dictionary to include only specified fields.
96
+
97
+ Supports nested field paths with dot notation, array indexing,
98
+ and wildcards.
99
+
100
+ Args:
101
+ data: Source dictionary
102
+ field_paths: List of field path strings
103
+
104
+ Returns:
105
+ Filtered dictionary
106
+ """
107
+ if not field_paths:
108
+ return data
109
+
110
+ result: dict[str, Any] = {}
111
+
112
+ for path in field_paths:
113
+ segments = parse_field_path(path)
114
+
115
+ if len(segments) == 1 and segments[0].type == "key":
116
+ # Simple key access
117
+ key = segments[0].value
118
+ if isinstance(key, str) and key in data:
119
+ result[key] = data[key]
120
+ else:
121
+ # Complex path - extract value
122
+ value = extract_field(data, segments)
123
+ if value is not None:
124
+ # Store using the top-level key or flattened path
125
+ top_key = segments[0].value
126
+ if isinstance(top_key, str):
127
+ if len(segments) == 1:
128
+ result[top_key] = value
129
+ else:
130
+ # For nested paths, reconstruct nested structure
131
+ _set_nested(result, segments, value)
132
+
133
+ return result
134
+
135
+
136
+ def _set_nested(target: dict[str, Any], segments: list[FieldPath], value: Any) -> None:
137
+ """Set a value in a nested dict structure following field path segments."""
138
+ current = target
139
+
140
+ for i, segment in enumerate(segments[:-1]):
141
+ if segment.type == "key":
142
+ key = segment.value
143
+ if isinstance(key, str):
144
+ if key not in current:
145
+ # Look ahead to determine container type
146
+ next_seg = segments[i + 1]
147
+ if next_seg.type == "index" or next_seg.type == "wildcard":
148
+ current[key] = []
149
+ else:
150
+ current[key] = {}
151
+ current = current[key]
152
+ elif segment.type in ("index", "wildcard"):
153
+ # For index/wildcard at intermediate level, just store value at parent
154
+ break
155
+
156
+ last = segments[-1]
157
+ if last.type == "key" and isinstance(last.value, str):
158
+ if isinstance(current, dict):
159
+ current[last.value] = value
160
+ elif isinstance(current, dict):
161
+ # Store with the dotted path as key for complex paths
162
+ path_str = ".".join(
163
+ str(s.value) if s.type != "wildcard" else "*"
164
+ for s in segments
165
+ )
166
+ current[path_str] = value
167
+
168
+
169
+ def exclude_fields(data: dict[str, Any], field_names: list[str]) -> dict[str, Any]:
170
+ """Remove specified fields from a dictionary.
171
+
172
+ Supports top-level and nested dot-notation keys.
173
+
174
+ Args:
175
+ data: Source dictionary
176
+ field_names: List of field names to remove
177
+
178
+ Returns:
179
+ Dictionary with specified fields removed
180
+ """
181
+ import copy
182
+
183
+ result = copy.deepcopy(data)
184
+
185
+ for field_name in field_names:
186
+ if "." in field_name:
187
+ # Nested field removal
188
+ parts = field_name.split(".")
189
+ current = result
190
+ for part in parts[:-1]:
191
+ if isinstance(current, dict) and part in current:
192
+ current = current[part]
193
+ else:
194
+ break
195
+ else:
196
+ if isinstance(current, dict) and parts[-1] in current:
197
+ del current[parts[-1]]
198
+ else:
199
+ result.pop(field_name, None)
200
+
201
+ return result
202
+
203
+
204
+ # Built-in field presets
205
+ BUILT_IN_PRESETS: dict[str, list[str]] = {
206
+ "minimal": ["name", "full_name", "headline", "url", "linkedin_url"],
207
+ "contact": ["name", "full_name", "email", "phone", "linkedin_url", "twitter_url"],
208
+ "recruiting": [
209
+ "name", "full_name", "headline", "company", "current_company",
210
+ "experience", "skills", "location", "linkedin_url",
211
+ ],
212
+ }
213
+
214
+
215
+ def resolve_fields_preset(preset_name: str) -> list[str] | None:
216
+ """Resolve a field preset name to a list of fields.
217
+
218
+ Checks built-in presets first, then user config.
219
+
220
+ Args:
221
+ preset_name: Name of the preset
222
+
223
+ Returns:
224
+ List of field names, or None if preset not found
225
+ """
226
+ # Check built-in presets
227
+ if preset_name in BUILT_IN_PRESETS:
228
+ return BUILT_IN_PRESETS[preset_name]
229
+
230
+ # Check user config
231
+ try:
232
+ from anysite.config.settings import get_config_value
233
+
234
+ custom = get_config_value(f"presets.{preset_name}")
235
+ if isinstance(custom, str):
236
+ return [f.strip() for f in custom.split(",")]
237
+ if isinstance(custom, list):
238
+ return custom
239
+ except Exception:
240
+ pass
241
+
242
+ return None
anysite/utils/retry.py ADDED
@@ -0,0 +1,109 @@
1
+ """Retry logic with exponential backoff."""
2
+
3
+ import asyncio
4
+ import random
5
+ from collections.abc import Awaitable, Callable
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+ from anysite.api.errors import (
10
+ NetworkError,
11
+ RateLimitError,
12
+ ServerError,
13
+ )
14
+ from anysite.api.errors import (
15
+ TimeoutError as AnysiteTimeoutError,
16
+ )
17
+
18
+
19
+ @dataclass
20
+ class RetryConfig:
21
+ """Configuration for retry behavior."""
22
+
23
+ max_attempts: int = 3
24
+ initial_delay: float = 1.0
25
+ max_delay: float = 60.0
26
+ exponential_base: float = 2.0
27
+ jitter: bool = True
28
+ retry_on: tuple[type[Exception], ...] = field(
29
+ default_factory=lambda: (
30
+ RateLimitError,
31
+ ServerError,
32
+ NetworkError,
33
+ AnysiteTimeoutError,
34
+ )
35
+ )
36
+
37
+
38
+ def calculate_delay(attempt: int, config: RetryConfig) -> float:
39
+ """Calculate delay for retry attempt using exponential backoff.
40
+
41
+ Args:
42
+ attempt: Zero-based attempt number
43
+ config: Retry configuration
44
+
45
+ Returns:
46
+ Delay in seconds
47
+ """
48
+ delay = config.initial_delay * (config.exponential_base ** attempt)
49
+ delay = min(delay, config.max_delay)
50
+
51
+ if config.jitter:
52
+ delay = delay * (0.5 + random.random() * 0.5)
53
+
54
+ return delay
55
+
56
+
57
+ def should_retry(exception: Exception, config: RetryConfig) -> bool:
58
+ """Determine if an exception should trigger a retry.
59
+
60
+ Args:
61
+ exception: The exception to check
62
+ config: Retry configuration
63
+
64
+ Returns:
65
+ True if the exception is retryable
66
+ """
67
+ return isinstance(exception, config.retry_on)
68
+
69
+
70
+ async def retry_async(
71
+ func: Callable[..., Awaitable[Any]],
72
+ config: RetryConfig | None = None,
73
+ *args: Any,
74
+ **kwargs: Any,
75
+ ) -> Any:
76
+ """Execute an async function with retry logic.
77
+
78
+ Args:
79
+ func: Async function to execute
80
+ config: Retry configuration (uses defaults if None)
81
+ *args: Positional arguments for func
82
+ **kwargs: Keyword arguments for func
83
+
84
+ Returns:
85
+ Result of the function
86
+
87
+ Raises:
88
+ The last exception if all retries are exhausted
89
+ """
90
+ if config is None:
91
+ config = RetryConfig()
92
+
93
+ last_error: Exception | None = None
94
+
95
+ for attempt in range(config.max_attempts):
96
+ try:
97
+ return await func(*args, **kwargs)
98
+ except Exception as e:
99
+ last_error = e
100
+ if not should_retry(e, config):
101
+ raise
102
+
103
+ if attempt < config.max_attempts - 1:
104
+ delay = calculate_delay(attempt, config)
105
+ await asyncio.sleep(delay)
106
+ else:
107
+ raise
108
+
109
+ raise last_error # type: ignore[misc]