anysite-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of anysite-cli might be problematic. Click here for more details.

Files changed (64) hide show
  1. anysite/__init__.py +4 -0
  2. anysite/__main__.py +6 -0
  3. anysite/api/__init__.py +21 -0
  4. anysite/api/client.py +271 -0
  5. anysite/api/errors.py +137 -0
  6. anysite/api/schemas.py +333 -0
  7. anysite/batch/__init__.py +1 -0
  8. anysite/batch/executor.py +176 -0
  9. anysite/batch/input.py +160 -0
  10. anysite/batch/rate_limiter.py +98 -0
  11. anysite/cli/__init__.py +1 -0
  12. anysite/cli/config.py +176 -0
  13. anysite/cli/executor.py +388 -0
  14. anysite/cli/options.py +249 -0
  15. anysite/config/__init__.py +11 -0
  16. anysite/config/paths.py +46 -0
  17. anysite/config/settings.py +187 -0
  18. anysite/dataset/__init__.py +37 -0
  19. anysite/dataset/analyzer.py +268 -0
  20. anysite/dataset/cli.py +644 -0
  21. anysite/dataset/collector.py +686 -0
  22. anysite/dataset/db_loader.py +248 -0
  23. anysite/dataset/errors.py +30 -0
  24. anysite/dataset/exporters.py +121 -0
  25. anysite/dataset/history.py +153 -0
  26. anysite/dataset/models.py +245 -0
  27. anysite/dataset/notifications.py +87 -0
  28. anysite/dataset/scheduler.py +107 -0
  29. anysite/dataset/storage.py +171 -0
  30. anysite/dataset/transformer.py +213 -0
  31. anysite/db/__init__.py +38 -0
  32. anysite/db/adapters/__init__.py +1 -0
  33. anysite/db/adapters/base.py +158 -0
  34. anysite/db/adapters/postgres.py +201 -0
  35. anysite/db/adapters/sqlite.py +183 -0
  36. anysite/db/cli.py +687 -0
  37. anysite/db/config.py +92 -0
  38. anysite/db/manager.py +166 -0
  39. anysite/db/operations/__init__.py +1 -0
  40. anysite/db/operations/insert.py +199 -0
  41. anysite/db/operations/query.py +43 -0
  42. anysite/db/schema/__init__.py +1 -0
  43. anysite/db/schema/inference.py +213 -0
  44. anysite/db/schema/types.py +71 -0
  45. anysite/db/utils/__init__.py +1 -0
  46. anysite/db/utils/sanitize.py +99 -0
  47. anysite/main.py +498 -0
  48. anysite/models/__init__.py +1 -0
  49. anysite/output/__init__.py +11 -0
  50. anysite/output/console.py +45 -0
  51. anysite/output/formatters.py +301 -0
  52. anysite/output/templates.py +76 -0
  53. anysite/py.typed +0 -0
  54. anysite/streaming/__init__.py +1 -0
  55. anysite/streaming/progress.py +121 -0
  56. anysite/streaming/writer.py +130 -0
  57. anysite/utils/__init__.py +1 -0
  58. anysite/utils/fields.py +242 -0
  59. anysite/utils/retry.py +109 -0
  60. anysite_cli-0.1.0.dist-info/METADATA +437 -0
  61. anysite_cli-0.1.0.dist-info/RECORD +64 -0
  62. anysite_cli-0.1.0.dist-info/WHEEL +4 -0
  63. anysite_cli-0.1.0.dist-info/entry_points.txt +2 -0
  64. anysite_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,301 @@
1
+ """Output formatters for different formats."""
2
+
3
+ import csv
4
+ import io
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import orjson
10
+ from rich.table import Table
11
+
12
+ from anysite.output.console import console
13
+
14
+
15
+ class OutputFormat(str, Enum):
16
+ """Supported output formats."""
17
+
18
+ JSON = "json"
19
+ JSONL = "jsonl"
20
+ CSV = "csv"
21
+ TABLE = "table"
22
+
23
+
24
+ def filter_fields(data: dict[str, Any], fields: list[str]) -> dict[str, Any]:
25
+ """Filter dictionary to only include specified fields.
26
+
27
+ Supports nested fields with dot notation (e.g., 'experience.company').
28
+
29
+ Args:
30
+ data: Source dictionary
31
+ fields: List of field names to include
32
+
33
+ Returns:
34
+ Filtered dictionary
35
+ """
36
+ if not fields:
37
+ return data
38
+
39
+ result: dict[str, Any] = {}
40
+ for field in fields:
41
+ if "." in field:
42
+ # Handle nested fields
43
+ parts = field.split(".")
44
+ value = data
45
+ for part in parts:
46
+ if isinstance(value, dict):
47
+ value = value.get(part)
48
+ elif isinstance(value, list) and part.isdigit():
49
+ idx = int(part)
50
+ value = value[idx] if idx < len(value) else None
51
+ else:
52
+ value = None
53
+ break
54
+ if value is not None:
55
+ # Store in nested structure
56
+ current = result
57
+ for part in parts[:-1]:
58
+ if part not in current:
59
+ current[part] = {}
60
+ current = current[part]
61
+ current[parts[-1]] = value
62
+ elif field in data:
63
+ result[field] = data[field]
64
+
65
+ return result
66
+
67
+
68
+ def flatten_for_csv(data: dict[str, Any], prefix: str = "") -> dict[str, Any]:
69
+ """Flatten nested dictionary for CSV output.
70
+
71
+ Args:
72
+ data: Nested dictionary
73
+ prefix: Prefix for nested keys
74
+
75
+ Returns:
76
+ Flattened dictionary with dot-notation keys
77
+ """
78
+ result: dict[str, Any] = {}
79
+
80
+ for key, value in data.items():
81
+ full_key = f"{prefix}.{key}" if prefix else key
82
+
83
+ if isinstance(value, dict):
84
+ result.update(flatten_for_csv(value, full_key))
85
+ elif isinstance(value, list):
86
+ if all(isinstance(item, (str, int, float, bool, type(None))) for item in value):
87
+ # Simple list - join as string
88
+ result[full_key] = "; ".join(str(v) for v in value if v is not None)
89
+ else:
90
+ # Complex list - take length or first few items
91
+ result[f"{full_key}_count"] = len(value)
92
+ for i, item in enumerate(value[:3]): # Max 3 items
93
+ if isinstance(item, dict):
94
+ result.update(flatten_for_csv(item, f"{full_key}_{i}"))
95
+ else:
96
+ result[f"{full_key}_{i}"] = item
97
+ else:
98
+ result[full_key] = value
99
+
100
+ return result
101
+
102
+
103
+ def format_json(data: Any, indent: bool = True) -> str:
104
+ """Format data as JSON.
105
+
106
+ Args:
107
+ data: Data to format
108
+ indent: Whether to indent (pretty print)
109
+
110
+ Returns:
111
+ JSON string
112
+ """
113
+ option = orjson.OPT_INDENT_2 if indent else 0
114
+ return orjson.dumps(data, option=option).decode("utf-8")
115
+
116
+
117
+ def format_jsonl(data: list[dict[str, Any]]) -> str:
118
+ """Format data as newline-delimited JSON (JSONL).
119
+
120
+ Args:
121
+ data: List of dictionaries
122
+
123
+ Returns:
124
+ JSONL string with one JSON object per line
125
+ """
126
+ lines = [orjson.dumps(item).decode("utf-8") for item in data]
127
+ return "\n".join(lines)
128
+
129
+
130
+ def format_csv_output(data: list[dict[str, Any]], fields: list[str] | None = None) -> str:
131
+ """Format data as CSV.
132
+
133
+ Args:
134
+ data: List of dictionaries
135
+ fields: Optional list of fields to include
136
+
137
+ Returns:
138
+ CSV string with headers
139
+ """
140
+ if not data:
141
+ return ""
142
+
143
+ # Flatten all records
144
+ flattened = [flatten_for_csv(item) for item in data]
145
+
146
+ # Get all unique keys for headers
147
+ if fields:
148
+ headers = fields
149
+ else:
150
+ headers = list(dict.fromkeys(key for item in flattened for key in item.keys()))
151
+
152
+ # Write CSV
153
+ output = io.StringIO()
154
+ writer = csv.DictWriter(output, fieldnames=headers, extrasaction="ignore")
155
+ writer.writeheader()
156
+ writer.writerows(flattened)
157
+ return output.getvalue()
158
+
159
+
160
+ def format_table_output(data: list[dict[str, Any]], fields: list[str] | None = None) -> None:
161
+ """Format data as a Rich table and print to console.
162
+
163
+ Args:
164
+ data: List of dictionaries
165
+ fields: Optional list of fields to include
166
+ """
167
+ if not data:
168
+ console.print("[dim]No results[/dim]")
169
+ return
170
+
171
+ # For single item, display vertically
172
+ if len(data) == 1:
173
+ table = Table(show_header=True, header_style="bold")
174
+ table.add_column("Field", style="cyan")
175
+ table.add_column("Value")
176
+
177
+ item = data[0]
178
+ if fields:
179
+ item = filter_fields(item, fields)
180
+
181
+ for key, value in item.items():
182
+ if isinstance(value, (dict, list)):
183
+ value = format_json(value, indent=False)
184
+ table.add_row(key, str(value) if value is not None else "[dim]null[/dim]")
185
+
186
+ console.print(table)
187
+ return
188
+
189
+ # For multiple items, display as grid
190
+ # Flatten for tabular display
191
+ flattened = [flatten_for_csv(item) for item in data]
192
+
193
+ if fields:
194
+ headers = fields
195
+ else:
196
+ # Select most important fields (limit columns for readability)
197
+ all_keys = list(dict.fromkeys(key for item in flattened for key in item.keys()))
198
+ # Prioritize common fields
199
+ priority_fields = ["name", "full_name", "headline", "title", "company", "url", "followers"]
200
+ headers = [f for f in priority_fields if f in all_keys]
201
+ headers.extend([f for f in all_keys if f not in headers][:10 - len(headers)])
202
+
203
+ # Create table
204
+ table = Table(show_header=True, header_style="bold")
205
+ for header in headers:
206
+ table.add_column(header)
207
+
208
+ for item in flattened:
209
+ row = []
210
+ for header in headers:
211
+ value = item.get(header, "")
212
+ if isinstance(value, (dict, list)):
213
+ value = "..."
214
+ row.append(str(value) if value is not None else "")
215
+ table.add_row(*row)
216
+
217
+ console.print(table)
218
+
219
+
220
+ def format_output(
221
+ data: Any,
222
+ output_format: OutputFormat,
223
+ fields: list[str] | None = None,
224
+ output_file: Path | None = None,
225
+ quiet: bool = False,
226
+ exclude: list[str] | None = None,
227
+ compact: bool = False,
228
+ append: bool = False,
229
+ ) -> None:
230
+ """Format and output data in the specified format.
231
+
232
+ Args:
233
+ data: Data to format (usually list of dicts from API)
234
+ output_format: Output format (json, jsonl, csv, table)
235
+ fields: Optional list of fields to include
236
+ output_file: Optional file path to write output
237
+ quiet: Suppress non-data output
238
+ exclude: Optional list of fields to exclude
239
+ compact: Use compact output (no indentation)
240
+ append: Append to existing file
241
+ """
242
+ # Ensure data is a list
243
+ if not isinstance(data, list):
244
+ data = [data]
245
+
246
+ # Filter fields if specified
247
+ if fields:
248
+ data = [filter_fields(item, fields) for item in data]
249
+
250
+ # Exclude fields if specified
251
+ if exclude:
252
+ from anysite.utils.fields import exclude_fields
253
+ data = [exclude_fields(item, exclude) for item in data]
254
+
255
+ # Format based on type
256
+ if output_format == OutputFormat.TABLE:
257
+ if output_file:
258
+ # Table can't be written to file, fall back to JSON
259
+ formatted = format_json(data, indent=not compact)
260
+ _write_output(formatted, output_file, quiet, append=append)
261
+ else:
262
+ format_table_output(data, fields)
263
+ return
264
+
265
+ if output_format == OutputFormat.JSON:
266
+ formatted = format_json(data, indent=not compact)
267
+ elif output_format == OutputFormat.JSONL:
268
+ formatted = format_jsonl(data)
269
+ elif output_format == OutputFormat.CSV:
270
+ formatted = format_csv_output(data, fields)
271
+ else:
272
+ formatted = format_json(data, indent=not compact)
273
+
274
+ _write_output(formatted, output_file, quiet, append=append)
275
+
276
+
277
+ def _write_output(
278
+ content: str,
279
+ output_file: Path | None,
280
+ quiet: bool,
281
+ append: bool = False,
282
+ ) -> None:
283
+ """Write content to file or stdout.
284
+
285
+ Args:
286
+ content: Content to write
287
+ output_file: Optional file path
288
+ quiet: Suppress messages
289
+ append: Append to existing file
290
+ """
291
+ if output_file:
292
+ mode = "a" if append else "w"
293
+ with open(output_file, mode, encoding="utf-8") as f:
294
+ f.write(content)
295
+ if not quiet:
296
+ from anysite.output.console import print_success
297
+
298
+ action = "appended to" if append else "saved to"
299
+ print_success(f"Output {action} {output_file}")
300
+ else:
301
+ print(content)
@@ -0,0 +1,76 @@
1
+ """Filename template resolution for batch output."""
2
+
3
+ from datetime import datetime
4
+ from typing import Any
5
+
6
+
7
+ class FilenameTemplate:
8
+ """Resolve filename templates with variable substitution.
9
+
10
+ Supported variables:
11
+ {id} - Record ID or input value
12
+ {username} - Username field from record
13
+ {date} - Current date (YYYY-MM-DD)
14
+ {datetime} - Current date and time (YYYY-MM-DD_HH-MM-SS)
15
+ {timestamp} - Unix timestamp
16
+ {index} - Zero-padded index
17
+ """
18
+
19
+ def __init__(self, template: str, extension: str = ".json") -> None:
20
+ """Initialize template.
21
+
22
+ Args:
23
+ template: Template string with {variable} placeholders
24
+ extension: File extension to append
25
+ """
26
+ self.template = template
27
+ self.extension = extension
28
+
29
+ def resolve(
30
+ self,
31
+ record: dict[str, Any] | None = None,
32
+ index: int = 0,
33
+ input_value: str = "",
34
+ ) -> str:
35
+ """Resolve template variables to an actual filename.
36
+
37
+ Args:
38
+ record: Data record (for extracting fields)
39
+ index: Item index in batch
40
+ input_value: Original input value
41
+
42
+ Returns:
43
+ Resolved filename string with extension
44
+ """
45
+ now = datetime.now()
46
+ record = record or {}
47
+
48
+ variables = {
49
+ "id": input_value or record.get("id", record.get("urn", str(index))),
50
+ "username": record.get("username", record.get("user", input_value)),
51
+ "date": now.strftime("%Y-%m-%d"),
52
+ "datetime": now.strftime("%Y-%m-%d_%H-%M-%S"),
53
+ "timestamp": str(int(now.timestamp())),
54
+ "index": f"{index:04d}",
55
+ }
56
+
57
+ filename = self.template
58
+ for key, value in variables.items():
59
+ filename = filename.replace(f"{{{key}}}", str(value))
60
+
61
+ # Sanitize filename
62
+ filename = self._sanitize(filename)
63
+
64
+ # Add extension if not present
65
+ if not any(filename.endswith(ext) for ext in [".json", ".jsonl", ".csv"]):
66
+ filename += self.extension
67
+
68
+ return filename
69
+
70
+ @staticmethod
71
+ def _sanitize(filename: str) -> str:
72
+ """Remove or replace unsafe characters from filename."""
73
+ unsafe = '<>:"/\\|?*'
74
+ for char in unsafe:
75
+ filename = filename.replace(char, "_")
76
+ return filename.strip(". ")
anysite/py.typed ADDED
File without changes
@@ -0,0 +1 @@
1
+ """Streaming output modules."""
@@ -0,0 +1,121 @@
1
+ """Progress tracking with Rich progress bars."""
2
+
3
+ import sys
4
+ import time
5
+
6
+ from rich.progress import (
7
+ BarColumn,
8
+ MofNCompleteColumn,
9
+ Progress,
10
+ SpinnerColumn,
11
+ TextColumn,
12
+ TimeElapsedColumn,
13
+ TimeRemainingColumn,
14
+ )
15
+
16
+
17
+ class ProgressTracker:
18
+ """Rich progress bar wrapper for tracking long operations.
19
+
20
+ Automatically shows/hides based on terminal detection and quiet mode.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ total: int | None = None,
26
+ description: str = "Processing...",
27
+ show: bool | None = None,
28
+ quiet: bool = False,
29
+ ) -> None:
30
+ """Initialize progress tracker.
31
+
32
+ Args:
33
+ total: Total number of items (None for indeterminate)
34
+ description: Description text
35
+ show: Force show/hide (None = auto-detect)
36
+ quiet: Suppress all output
37
+ """
38
+ self.total = total
39
+ self.description = description
40
+ self.quiet = quiet
41
+ self._completed = 0
42
+ self._start_time = time.monotonic()
43
+ self._progress: Progress | None = None
44
+
45
+ # Determine if we should show progress
46
+ if quiet:
47
+ self._should_show = False
48
+ elif show is not None:
49
+ self._should_show = show
50
+ else:
51
+ # Auto: show if stdout is a terminal
52
+ self._should_show = sys.stderr.isatty()
53
+
54
+ def start(self) -> None:
55
+ """Start the progress bar."""
56
+ if not self._should_show:
57
+ return
58
+
59
+ columns = [
60
+ SpinnerColumn(),
61
+ TextColumn("[bold blue]{task.description}"),
62
+ BarColumn(),
63
+ MofNCompleteColumn(),
64
+ TimeElapsedColumn(),
65
+ TimeRemainingColumn(),
66
+ ]
67
+
68
+ self._progress = Progress(*columns, transient=True)
69
+ self._progress.start()
70
+ self._task_id = self._progress.add_task(
71
+ self.description,
72
+ total=self.total,
73
+ )
74
+
75
+ def update(self, n: int = 1) -> None:
76
+ """Update progress by n items.
77
+
78
+ Args:
79
+ n: Number of items completed
80
+ """
81
+ self._completed += n
82
+ if self._progress is not None:
83
+ self._progress.update(self._task_id, advance=n)
84
+
85
+ def set_status(self, text: str) -> None:
86
+ """Update the description text.
87
+
88
+ Args:
89
+ text: New description
90
+ """
91
+ self.description = text
92
+ if self._progress is not None:
93
+ self._progress.update(self._task_id, description=text)
94
+
95
+ def finish(self) -> None:
96
+ """Complete the progress bar."""
97
+ if self._progress is not None:
98
+ self._progress.stop()
99
+ self._progress = None
100
+
101
+ def get_stats(self) -> dict[str, float | int]:
102
+ """Get execution statistics.
103
+
104
+ Returns:
105
+ Dictionary with timing and throughput stats
106
+ """
107
+ elapsed = time.monotonic() - self._start_time
108
+ rate = self._completed / elapsed if elapsed > 0 else 0
109
+
110
+ return {
111
+ "total": self._completed,
112
+ "elapsed_seconds": round(elapsed, 2),
113
+ "records_per_second": round(rate, 1),
114
+ }
115
+
116
+ def __enter__(self) -> "ProgressTracker":
117
+ self.start()
118
+ return self
119
+
120
+ def __exit__(self, *args: object) -> None:
121
+ self.finish()
@@ -0,0 +1,130 @@
1
+ """Streaming record writer for outputting records one at a time."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+ from typing import IO, Any
6
+
7
+ import orjson
8
+
9
+ from anysite.output.formatters import OutputFormat
10
+ from anysite.utils.fields import exclude_fields, filter_fields
11
+
12
+
13
+ class StreamingWriter:
14
+ """Write records one at a time to stdout or a file.
15
+
16
+ Supports JSONL (primary) and CSV streaming output.
17
+ """
18
+
19
+ def __init__(
20
+ self,
21
+ output: Path | None = None,
22
+ format: OutputFormat = OutputFormat.JSONL,
23
+ fields: list[str] | None = None,
24
+ exclude: list[str] | None = None,
25
+ compact: bool = False,
26
+ append: bool = False,
27
+ ) -> None:
28
+ """Initialize streaming writer.
29
+
30
+ Args:
31
+ output: Output file path (None = stdout)
32
+ format: Output format (JSONL or CSV)
33
+ fields: Fields to include
34
+ exclude: Fields to exclude
35
+ compact: Compact JSON output
36
+ append: Append to existing file
37
+ """
38
+ self.output = output
39
+ self.format = format
40
+ self.fields = fields
41
+ self.exclude = exclude
42
+ self.compact = compact
43
+ self.append = append
44
+ self._file: IO[str] | None = None
45
+ self._csv_headers_written = False
46
+ self._count = 0
47
+
48
+ def _get_writer(self) -> IO[str]:
49
+ """Get or create the output writer."""
50
+ if self._file is not None:
51
+ return self._file
52
+
53
+ if self.output:
54
+ mode = "a" if self.append else "w"
55
+ self._file = open(self.output, mode, encoding="utf-8") # noqa: SIM115
56
+ return self._file
57
+ else:
58
+ return sys.stdout
59
+
60
+ def _process_record(self, record: dict[str, Any]) -> dict[str, Any]:
61
+ """Apply field filtering to a record."""
62
+ if self.fields:
63
+ record = filter_fields(record, self.fields)
64
+ if self.exclude:
65
+ record = exclude_fields(record, self.exclude)
66
+ return record
67
+
68
+ def write(self, record: dict[str, Any]) -> None:
69
+ """Write a single record.
70
+
71
+ Args:
72
+ record: Dictionary to write
73
+ """
74
+ record = self._process_record(record)
75
+ writer = self._get_writer()
76
+
77
+ if self.format == OutputFormat.JSONL:
78
+ line = orjson.dumps(record).decode("utf-8")
79
+ writer.write(line + "\n")
80
+ writer.flush()
81
+
82
+ elif self.format == OutputFormat.CSV:
83
+ import csv
84
+ import io
85
+
86
+ if not self._csv_headers_written:
87
+ # Flatten and write header
88
+ from anysite.output.formatters import flatten_for_csv
89
+ flat = flatten_for_csv(record)
90
+ self._csv_fieldnames = list(flat.keys())
91
+ output = io.StringIO()
92
+ csv_writer = csv.DictWriter(output, fieldnames=self._csv_fieldnames, extrasaction="ignore")
93
+ csv_writer.writeheader()
94
+ csv_writer.writerow(flat)
95
+ writer.write(output.getvalue())
96
+ self._csv_headers_written = True
97
+ else:
98
+ from anysite.output.formatters import flatten_for_csv
99
+ flat = flatten_for_csv(record)
100
+ output = io.StringIO()
101
+ csv_writer = csv.DictWriter(output, fieldnames=self._csv_fieldnames, extrasaction="ignore")
102
+ csv_writer.writerow(flat)
103
+ writer.write(output.getvalue())
104
+
105
+ writer.flush()
106
+
107
+ else:
108
+ # Default to JSONL for streaming
109
+ line = orjson.dumps(record).decode("utf-8")
110
+ writer.write(line + "\n")
111
+ writer.flush()
112
+
113
+ self._count += 1
114
+
115
+ def close(self) -> None:
116
+ """Close the output file if opened."""
117
+ if self._file is not None:
118
+ self._file.close()
119
+ self._file = None
120
+
121
+ @property
122
+ def count(self) -> int:
123
+ """Number of records written."""
124
+ return self._count
125
+
126
+ def __enter__(self) -> "StreamingWriter":
127
+ return self
128
+
129
+ def __exit__(self, *args: object) -> None:
130
+ self.close()
@@ -0,0 +1 @@
1
+ """Utility modules."""