anysite-cli 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. anysite/__init__.py +4 -0
  2. anysite/__main__.py +6 -0
  3. anysite/api/__init__.py +21 -0
  4. anysite/api/client.py +271 -0
  5. anysite/api/errors.py +137 -0
  6. anysite/api/schemas.py +333 -0
  7. anysite/batch/__init__.py +1 -0
  8. anysite/batch/executor.py +176 -0
  9. anysite/batch/input.py +160 -0
  10. anysite/batch/rate_limiter.py +98 -0
  11. anysite/cli/__init__.py +1 -0
  12. anysite/cli/config.py +176 -0
  13. anysite/cli/executor.py +388 -0
  14. anysite/cli/options.py +249 -0
  15. anysite/config/__init__.py +11 -0
  16. anysite/config/paths.py +46 -0
  17. anysite/config/settings.py +187 -0
  18. anysite/dataset/__init__.py +37 -0
  19. anysite/dataset/analyzer.py +268 -0
  20. anysite/dataset/cli.py +644 -0
  21. anysite/dataset/collector.py +686 -0
  22. anysite/dataset/db_loader.py +248 -0
  23. anysite/dataset/errors.py +30 -0
  24. anysite/dataset/exporters.py +121 -0
  25. anysite/dataset/history.py +153 -0
  26. anysite/dataset/models.py +245 -0
  27. anysite/dataset/notifications.py +87 -0
  28. anysite/dataset/scheduler.py +107 -0
  29. anysite/dataset/storage.py +171 -0
  30. anysite/dataset/transformer.py +213 -0
  31. anysite/db/__init__.py +38 -0
  32. anysite/db/adapters/__init__.py +1 -0
  33. anysite/db/adapters/base.py +158 -0
  34. anysite/db/adapters/postgres.py +201 -0
  35. anysite/db/adapters/sqlite.py +183 -0
  36. anysite/db/cli.py +709 -0
  37. anysite/db/config.py +92 -0
  38. anysite/db/manager.py +166 -0
  39. anysite/db/operations/__init__.py +1 -0
  40. anysite/db/operations/insert.py +199 -0
  41. anysite/db/operations/query.py +43 -0
  42. anysite/db/schema/__init__.py +1 -0
  43. anysite/db/schema/inference.py +213 -0
  44. anysite/db/schema/types.py +71 -0
  45. anysite/db/utils/__init__.py +1 -0
  46. anysite/db/utils/sanitize.py +99 -0
  47. anysite/main.py +498 -0
  48. anysite/models/__init__.py +1 -0
  49. anysite/output/__init__.py +11 -0
  50. anysite/output/console.py +45 -0
  51. anysite/output/formatters.py +301 -0
  52. anysite/output/templates.py +76 -0
  53. anysite/py.typed +0 -0
  54. anysite/streaming/__init__.py +1 -0
  55. anysite/streaming/progress.py +121 -0
  56. anysite/streaming/writer.py +130 -0
  57. anysite/utils/__init__.py +1 -0
  58. anysite/utils/fields.py +242 -0
  59. anysite/utils/retry.py +109 -0
  60. anysite_cli-0.1.2.dist-info/METADATA +455 -0
  61. anysite_cli-0.1.2.dist-info/RECORD +64 -0
  62. anysite_cli-0.1.2.dist-info/WHEEL +4 -0
  63. anysite_cli-0.1.2.dist-info/entry_points.txt +2 -0
  64. anysite_cli-0.1.2.dist-info/licenses/LICENSE +21 -0
anysite/cli/options.py ADDED
@@ -0,0 +1,249 @@
1
+ """Shared CLI option definitions for all platform commands."""
2
+
3
+ import sys
4
+ from enum import Enum
5
+ from pathlib import Path
6
+ from typing import Annotated
7
+
8
+ import typer
9
+
10
+ from anysite.output.formatters import OutputFormat
11
+
12
+ # === Phase 1 Options (extracted from individual CLI modules) ===
13
+
14
+ FormatOption = Annotated[
15
+ OutputFormat,
16
+ typer.Option(
17
+ "--format", "-f",
18
+ help="Output format",
19
+ case_sensitive=False,
20
+ ),
21
+ ]
22
+
23
+ FieldsOption = Annotated[
24
+ str | None,
25
+ typer.Option(
26
+ "--fields",
27
+ help="Comma-separated list of fields to include",
28
+ ),
29
+ ]
30
+
31
+ OutputOption = Annotated[
32
+ Path | None,
33
+ typer.Option(
34
+ "--output", "-o",
35
+ help="Save output to file",
36
+ ),
37
+ ]
38
+
39
+ QuietOption = Annotated[
40
+ bool,
41
+ typer.Option(
42
+ "--quiet", "-q",
43
+ help="Suppress non-data output",
44
+ ),
45
+ ]
46
+
47
+
48
+ # === Phase 2: Enhanced Field Selection ===
49
+
50
+ ExcludeOption = Annotated[
51
+ str | None,
52
+ typer.Option(
53
+ "--exclude",
54
+ help="Comma-separated list of fields to exclude",
55
+ rich_help_panel="Output Options",
56
+ ),
57
+ ]
58
+
59
+ CompactOption = Annotated[
60
+ bool,
61
+ typer.Option(
62
+ "--compact",
63
+ help="Compact output (no indentation)",
64
+ rich_help_panel="Output Options",
65
+ ),
66
+ ]
67
+
68
+ FieldsPresetOption = Annotated[
69
+ str | None,
70
+ typer.Option(
71
+ "--fields-preset",
72
+ help="Named field preset (minimal, contact, recruiting)",
73
+ rich_help_panel="Output Options",
74
+ ),
75
+ ]
76
+
77
+
78
+ # === Phase 2: Streaming ===
79
+
80
+ StreamOption = Annotated[
81
+ bool,
82
+ typer.Option(
83
+ "--stream/--no-stream",
84
+ help="Stream output as JSONL (one record per line)",
85
+ rich_help_panel="Output Options",
86
+ ),
87
+ ]
88
+
89
+
90
+ # === Phase 2: Output Enhancements ===
91
+
92
+ AppendOption = Annotated[
93
+ bool,
94
+ typer.Option(
95
+ "--append",
96
+ help="Append to existing output file",
97
+ rich_help_panel="Output Options",
98
+ ),
99
+ ]
100
+
101
+ OutputDirOption = Annotated[
102
+ Path | None,
103
+ typer.Option(
104
+ "--output-dir",
105
+ help="Output directory (one file per record in batch mode)",
106
+ rich_help_panel="Output Options",
107
+ ),
108
+ ]
109
+
110
+ FilenameTemplateOption = Annotated[
111
+ str,
112
+ typer.Option(
113
+ "--filename-template",
114
+ help="Filename template for batch output ({id}, {username}, {date}, {index})",
115
+ rich_help_panel="Output Options",
116
+ ),
117
+ ]
118
+
119
+
120
+ # === Phase 2: Batch Input ===
121
+
122
+ FromFileOption = Annotated[
123
+ Path | None,
124
+ typer.Option(
125
+ "--from-file",
126
+ help="Read inputs from file (one per line, or JSONL/CSV)",
127
+ rich_help_panel="Batch Input",
128
+ ),
129
+ ]
130
+
131
+ StdinOption = Annotated[
132
+ bool,
133
+ typer.Option(
134
+ "--stdin",
135
+ help="Read inputs from stdin",
136
+ rich_help_panel="Batch Input",
137
+ ),
138
+ ]
139
+
140
+ ParallelOption = Annotated[
141
+ int,
142
+ typer.Option(
143
+ "--parallel", "-j",
144
+ help="Number of parallel requests",
145
+ rich_help_panel="Batch Input",
146
+ ),
147
+ ]
148
+
149
+ DelayOption = Annotated[
150
+ float,
151
+ typer.Option(
152
+ "--delay",
153
+ help="Delay between requests in seconds",
154
+ rich_help_panel="Batch Input",
155
+ ),
156
+ ]
157
+
158
+
159
+ class ErrorHandling(str, Enum):
160
+ """Error handling modes for batch operations."""
161
+
162
+ STOP = "stop"
163
+ SKIP = "skip"
164
+ RETRY = "retry"
165
+
166
+
167
+ OnErrorOption = Annotated[
168
+ ErrorHandling,
169
+ typer.Option(
170
+ "--on-error",
171
+ help="Error handling mode: stop, skip, or retry",
172
+ rich_help_panel="Batch Input",
173
+ ),
174
+ ]
175
+
176
+
177
+ # === Phase 2: Rate Limiting ===
178
+
179
+ RateLimitOption = Annotated[
180
+ str | None,
181
+ typer.Option(
182
+ "--rate-limit",
183
+ help="Rate limit (e.g., '10/s', '100/m', '1000/h')",
184
+ rich_help_panel="Advanced",
185
+ ),
186
+ ]
187
+
188
+
189
+ # === Phase 2: Progress & Feedback ===
190
+
191
+ ProgressOption = Annotated[
192
+ bool | None,
193
+ typer.Option(
194
+ "--progress/--no-progress",
195
+ help="Show progress bar",
196
+ rich_help_panel="Advanced",
197
+ ),
198
+ ]
199
+
200
+ StatsOption = Annotated[
201
+ bool,
202
+ typer.Option(
203
+ "--stats",
204
+ help="Show statistics after completion",
205
+ rich_help_panel="Advanced",
206
+ ),
207
+ ]
208
+
209
+ VerboseOption = Annotated[
210
+ bool,
211
+ typer.Option(
212
+ "--verbose",
213
+ help="Verbose output with debug information",
214
+ rich_help_panel="Advanced",
215
+ ),
216
+ ]
217
+
218
+
219
+ def parse_fields(fields: str | None) -> list[str] | None:
220
+ """Parse comma-separated fields string.
221
+
222
+ Args:
223
+ fields: Comma-separated field names or None
224
+
225
+ Returns:
226
+ List of field names or None
227
+ """
228
+ if not fields:
229
+ return None
230
+ return [f.strip() for f in fields.split(",")]
231
+
232
+
233
+ def parse_exclude(exclude: str | None) -> list[str] | None:
234
+ """Parse comma-separated exclude fields string.
235
+
236
+ Args:
237
+ exclude: Comma-separated field names to exclude or None
238
+
239
+ Returns:
240
+ List of field names or None
241
+ """
242
+ if not exclude:
243
+ return None
244
+ return [f.strip() for f in exclude.split(",")]
245
+
246
+
247
+ def is_stdin_piped() -> bool:
248
+ """Check if stdin has piped data."""
249
+ return not sys.stdin.isatty()
@@ -0,0 +1,11 @@
1
+ """Configuration module."""
2
+
3
+ from anysite.config.paths import get_config_dir, get_config_path
4
+ from anysite.config.settings import Settings, get_settings
5
+
6
+ __all__ = [
7
+ "Settings",
8
+ "get_settings",
9
+ "get_config_dir",
10
+ "get_config_path",
11
+ ]
@@ -0,0 +1,46 @@
1
+ """Configuration file paths."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+
7
+ def get_config_dir() -> Path:
8
+ """Get the configuration directory path.
9
+
10
+ Returns:
11
+ Path to ~/.anysite/ on Unix or %APPDATA%/anysite/ on Windows.
12
+ """
13
+ if os.name == "nt": # Windows
14
+ base = Path(os.environ.get("APPDATA", str(Path.home())))
15
+ return base / "anysite"
16
+ else: # Unix-like (Linux, macOS)
17
+ return Path.home() / ".anysite"
18
+
19
+
20
+ def get_config_path() -> Path:
21
+ """Get the configuration file path.
22
+
23
+ Returns:
24
+ Path to config.yaml in the config directory.
25
+ """
26
+ return get_config_dir() / "config.yaml"
27
+
28
+
29
+ def get_schema_cache_path() -> Path:
30
+ """Get the schema cache file path.
31
+
32
+ Returns:
33
+ Path to schema.json in the config directory.
34
+ """
35
+ return get_config_dir() / "schema.json"
36
+
37
+
38
+ def ensure_config_dir() -> Path:
39
+ """Ensure the configuration directory exists.
40
+
41
+ Returns:
42
+ Path to the config directory.
43
+ """
44
+ config_dir = get_config_dir()
45
+ config_dir.mkdir(parents=True, exist_ok=True)
46
+ return config_dir
@@ -0,0 +1,187 @@
1
+ """Application settings using Pydantic Settings."""
2
+
3
+ from functools import lru_cache
4
+ from typing import Any
5
+
6
+ import yaml
7
+ from pydantic import Field
8
+ from pydantic_settings import BaseSettings, SettingsConfigDict
9
+
10
+ from anysite.config.paths import get_config_path
11
+
12
+
13
+ def load_yaml_config() -> dict[str, Any]:
14
+ """Load configuration from YAML file."""
15
+ config_path = get_config_path()
16
+ if config_path.exists():
17
+ with open(config_path) as f:
18
+ return yaml.safe_load(f) or {}
19
+ return {}
20
+
21
+
22
+ class Settings(BaseSettings):
23
+ """Application settings.
24
+
25
+ Priority (highest to lowest):
26
+ 1. CLI arguments (handled separately)
27
+ 2. Environment variables (ANYSITE_*)
28
+ 3. Config file (~/.anysite/config.yaml)
29
+ 4. Default values
30
+ """
31
+
32
+ model_config = SettingsConfigDict(
33
+ env_prefix="ANYSITE_",
34
+ env_file=".env",
35
+ env_file_encoding="utf-8",
36
+ extra="ignore",
37
+ )
38
+
39
+ # API settings
40
+ api_key: str | None = Field(
41
+ default=None,
42
+ description="Anysite API key",
43
+ )
44
+ base_url: str = Field(
45
+ default="https://api.anysite.io",
46
+ description="Anysite API base URL",
47
+ )
48
+ timeout: int = Field(
49
+ default=300,
50
+ ge=20,
51
+ le=1500,
52
+ description="API request timeout in seconds",
53
+ )
54
+
55
+ # CLI defaults
56
+ default_format: str = Field(
57
+ default="json",
58
+ description="Default output format (json, jsonl, csv, table)",
59
+ )
60
+ default_count: int = Field(
61
+ default=10,
62
+ ge=1,
63
+ le=1000,
64
+ description="Default count for search results",
65
+ )
66
+
67
+ # Phase 2: Batch defaults
68
+ default_parallel: int = Field(
69
+ default=1,
70
+ ge=1,
71
+ le=50,
72
+ description="Default parallel concurrency for batch operations",
73
+ )
74
+ default_rate_limit: str | None = Field(
75
+ default=None,
76
+ description="Default rate limit (e.g., '10/s', '60/m')",
77
+ )
78
+ auto_stream_threshold: int = Field(
79
+ default=100,
80
+ ge=1,
81
+ le=1000,
82
+ description="Auto-enable streaming when count exceeds this threshold",
83
+ )
84
+
85
+ # Debug
86
+ debug: bool = Field(
87
+ default=False,
88
+ description="Enable debug mode",
89
+ )
90
+
91
+ def __init__(self, **kwargs: Any) -> None:
92
+ # Load YAML config first
93
+ yaml_config = load_yaml_config()
94
+
95
+ # Handle nested 'defaults' key from YAML
96
+ if "defaults" in yaml_config:
97
+ defaults = yaml_config.pop("defaults")
98
+ if "format" in defaults:
99
+ yaml_config.setdefault("default_format", defaults["format"])
100
+ if "count" in defaults:
101
+ yaml_config.setdefault("default_count", defaults["count"])
102
+ if "timeout" in defaults:
103
+ yaml_config.setdefault("timeout", defaults["timeout"])
104
+ if "parallel" in defaults:
105
+ yaml_config.setdefault("default_parallel", defaults["parallel"])
106
+ if "rate_limit" in defaults:
107
+ yaml_config.setdefault("default_rate_limit", defaults["rate_limit"])
108
+ if "auto_stream_threshold" in defaults:
109
+ yaml_config.setdefault("auto_stream_threshold", defaults["auto_stream_threshold"])
110
+
111
+ # Merge: kwargs (CLI) > env > yaml > defaults
112
+ merged = {**yaml_config, **kwargs}
113
+ super().__init__(**merged)
114
+
115
+
116
+ @lru_cache
117
+ def get_settings() -> Settings:
118
+ """Get cached settings instance."""
119
+ return Settings()
120
+
121
+
122
+ def save_config(key: str, value: Any) -> None:
123
+ """Save a configuration value to the YAML config file.
124
+
125
+ Args:
126
+ key: Configuration key (e.g., 'api_key', 'defaults.format')
127
+ value: Value to save
128
+ """
129
+ from anysite.config.paths import ensure_config_dir, get_config_path
130
+
131
+ ensure_config_dir()
132
+ config_path = get_config_path()
133
+
134
+ # Load existing config
135
+ config: dict[str, Any] = {}
136
+ if config_path.exists():
137
+ with open(config_path) as f:
138
+ config = yaml.safe_load(f) or {}
139
+
140
+ # Handle nested keys (e.g., 'defaults.format')
141
+ if "." in key:
142
+ parts = key.split(".")
143
+ current = config
144
+ for part in parts[:-1]:
145
+ if part not in current:
146
+ current[part] = {}
147
+ current = current[part]
148
+ current[parts[-1]] = value
149
+ else:
150
+ config[key] = value
151
+
152
+ # Save config
153
+ with open(config_path, "w") as f:
154
+ yaml.dump(config, f, default_flow_style=False, allow_unicode=True)
155
+
156
+ # Clear settings cache
157
+ get_settings.cache_clear()
158
+
159
+
160
+ def get_config_value(key: str) -> Any:
161
+ """Get a configuration value from the YAML config file.
162
+
163
+ Args:
164
+ key: Configuration key (e.g., 'api_key', 'defaults.format')
165
+
166
+ Returns:
167
+ The configuration value or None if not found.
168
+ """
169
+ config = load_yaml_config()
170
+
171
+ # Handle nested keys
172
+ if "." in key:
173
+ parts = key.split(".")
174
+ current: Any = config
175
+ for part in parts:
176
+ if isinstance(current, dict) and part in current:
177
+ current = current[part]
178
+ else:
179
+ return None
180
+ return current
181
+
182
+ return config.get(key)
183
+
184
+
185
+ def list_config() -> dict[str, Any]:
186
+ """List all configuration values."""
187
+ return load_yaml_config()
@@ -0,0 +1,37 @@
1
+ """Dataset subsystem for multi-source data collection and analysis."""
2
+
3
+ from typing import NoReturn
4
+
5
+
6
+ def check_data_deps() -> None:
7
+ """Check that optional data dependencies are installed.
8
+
9
+ Raises:
10
+ SystemExit: If duckdb or pyarrow are not installed.
11
+ """
12
+ missing: list[str] = []
13
+
14
+ try:
15
+ import duckdb # noqa: F401
16
+ except ImportError:
17
+ missing.append("duckdb")
18
+
19
+ try:
20
+ import pyarrow # noqa: F401
21
+ except ImportError:
22
+ missing.append("pyarrow")
23
+
24
+ if missing:
25
+ _missing_deps_error(missing)
26
+
27
+
28
+ def _missing_deps_error(missing: list[str]) -> NoReturn:
29
+ import typer
30
+
31
+ names = ", ".join(missing)
32
+ typer.echo(
33
+ f"Error: Missing required packages: {names}\n"
34
+ f"Install with: pip install anysite-cli[data]",
35
+ err=True,
36
+ )
37
+ raise typer.Exit(1)