gapless-crypto-clickhouse 7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. gapless_crypto_clickhouse/__init__.py +147 -0
  2. gapless_crypto_clickhouse/__probe__.py +349 -0
  3. gapless_crypto_clickhouse/api.py +1032 -0
  4. gapless_crypto_clickhouse/clickhouse/__init__.py +17 -0
  5. gapless_crypto_clickhouse/clickhouse/config.py +119 -0
  6. gapless_crypto_clickhouse/clickhouse/connection.py +269 -0
  7. gapless_crypto_clickhouse/clickhouse/schema.sql +98 -0
  8. gapless_crypto_clickhouse/clickhouse/schema_validator.py +312 -0
  9. gapless_crypto_clickhouse/clickhouse_query.py +642 -0
  10. gapless_crypto_clickhouse/collectors/__init__.py +21 -0
  11. gapless_crypto_clickhouse/collectors/binance_public_data_collector.py +1994 -0
  12. gapless_crypto_clickhouse/collectors/clickhouse_bulk_loader.py +446 -0
  13. gapless_crypto_clickhouse/collectors/concurrent_collection_orchestrator.py +407 -0
  14. gapless_crypto_clickhouse/collectors/csv_format_detector.py +123 -0
  15. gapless_crypto_clickhouse/collectors/httpx_downloader.py +395 -0
  16. gapless_crypto_clickhouse/collectors/hybrid_url_generator.py +316 -0
  17. gapless_crypto_clickhouse/exceptions.py +145 -0
  18. gapless_crypto_clickhouse/gap_filling/__init__.py +1 -0
  19. gapless_crypto_clickhouse/gap_filling/safe_file_operations.py +439 -0
  20. gapless_crypto_clickhouse/gap_filling/universal_gap_filler.py +757 -0
  21. gapless_crypto_clickhouse/llms.txt +268 -0
  22. gapless_crypto_clickhouse/probe.py +235 -0
  23. gapless_crypto_clickhouse/py.typed +0 -0
  24. gapless_crypto_clickhouse/query_api.py +374 -0
  25. gapless_crypto_clickhouse/resume/__init__.py +12 -0
  26. gapless_crypto_clickhouse/resume/intelligent_checkpointing.py +383 -0
  27. gapless_crypto_clickhouse/utils/__init__.py +29 -0
  28. gapless_crypto_clickhouse/utils/error_handling.py +202 -0
  29. gapless_crypto_clickhouse/utils/etag_cache.py +194 -0
  30. gapless_crypto_clickhouse/utils/timeframe_constants.py +90 -0
  31. gapless_crypto_clickhouse/utils/timestamp_format_analyzer.py +256 -0
  32. gapless_crypto_clickhouse/utils/timestamp_utils.py +130 -0
  33. gapless_crypto_clickhouse/validation/__init__.py +36 -0
  34. gapless_crypto_clickhouse/validation/csv_validator.py +677 -0
  35. gapless_crypto_clickhouse/validation/models.py +220 -0
  36. gapless_crypto_clickhouse/validation/storage.py +502 -0
  37. gapless_crypto_clickhouse-7.1.0.dist-info/METADATA +1277 -0
  38. gapless_crypto_clickhouse-7.1.0.dist-info/RECORD +40 -0
  39. gapless_crypto_clickhouse-7.1.0.dist-info/WHEEL +4 -0
  40. gapless_crypto_clickhouse-7.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,147 @@
1
+ """
2
+ Gapless Crypto ClickHouse v5.0.0 - ClickHouse-based cryptocurrency data collection with zero-gap guarantee
3
+
4
+ Market Compatibility:
5
+ - USDT SPOT PAIRS (BTCUSDT, ETHUSDT, SOLUSDT, etc.)
6
+ - USDT-margined PERPETUAL FUTURES (BTCUSDT perps, ETHUSDT perps, etc.)
7
+ - Instrument type distinction via `instrument_type` column ('spot' or 'futures')
8
+ - NO delivery futures, NO coin-margined futures
9
+
10
+ Core Features:
11
+ - Data collection via Binance public data repository (22x performance vs API calls)
12
+ - Full 11-column microstructure format with order flow and liquidity metrics
13
+ - Zero gaps guarantee through authentic API-first validation
14
+ - UV-based Python tooling
15
+ - Atomic file operations
16
+ - Complete 13-timeframe support (1s, 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d)
17
+ - Ultra-high frequency (1s) to daily (1d) data collection
18
+ - Intelligent monthly-to-daily fallback for seamless recent data access
19
+ - Gap detection and filling with authentic data only
20
+ - CCXT-compatible dual parameter support (timeframe/interval)
21
+ - Backward compatibility with 5-year deprecation period
22
+
23
+ Data Source:
24
+ Binance Spot Market: https://data.binance.vision/data/spot/monthly/klines/
25
+ Market Type: SPOT only (no futures/derivatives)
26
+ Supported Pairs: USDT-quoted spot pairs exclusively
27
+
28
+ Usage:
29
+ # Function-based API
30
+ import gapless_crypto_clickhouse as gcd
31
+
32
+ # Fetch recent data as standard pandas DataFrame
33
+ df = gcd.fetch_data("BTCUSDT", timeframe="1h", limit=1000)
34
+
35
+ # Standard pandas operations for analysis
36
+ returns = df['close'].pct_change() # Returns calculation
37
+ rolling_vol = df['close'].rolling(20).std() # Rolling volatility
38
+ max_drawdown = (df['close'] / df['close'].cummax() - 1).min() # Drawdown
39
+
40
+ # Resampling with pandas
41
+ df_resampled = df.set_index('date').resample('4H').agg({
42
+ 'open': 'first', 'high': 'max', 'low': 'min',
43
+ 'close': 'last', 'volume': 'sum'
44
+ })
45
+
46
+ # Backward compatibility (legacy interval parameter)
47
+ df = gcd.fetch_data("BTCUSDT", interval="1h", limit=1000) # DeprecationWarning
48
+
49
+ # Download with date range
50
+ df = gcd.download("ETHUSDT", timeframe="4h", start="2024-01-01", end="2024-06-30")
51
+
52
+ # Get available symbols and timeframes
53
+ symbols = gcd.get_supported_symbols()
54
+ timeframes = gcd.get_supported_timeframes()
55
+
56
+ # Fill gaps in existing data
57
+ results = gcd.fill_gaps("./data")
58
+
59
+ # Class-based API (for complex workflows)
60
+ from gapless_crypto_clickhouse import BinancePublicDataCollector, UniversalGapFiller
61
+
62
+ collector = BinancePublicDataCollector()
63
+ result = collector.collect_timeframe_data("1h")
64
+ df = result["dataframe"]
65
+
66
+ Package Relationship:
67
+ This package is a fork of gapless-crypto-data focused on ClickHouse database workflows.
68
+
69
+ For file-based workflows (CSV/Parquet only):
70
+ See https://pypi.org/project/gapless-crypto-data/
71
+
72
+ Migrating from gapless-crypto-data:
73
+ See docs/development/CLI_MIGRATION_GUIDE.md for migration guide.
74
+ Note: This package never had a CLI (Python API only).
75
+
76
+ Supported Symbols (713 perpetual symbols - Spot + Futures Aligned):
77
+ Both spot and futures-um support 713 validated perpetual symbols
78
+ sourced from binance-futures-availability package (95%+ SLA, daily S3 Vision probes).
79
+
80
+ See get_supported_symbols() for complete list.
81
+ Major pairs: BTCUSDT, ETHUSDT, BNBUSDT, SOLUSDT, XRPUSDT, and 708 more.
82
+ """
83
+
84
+ __version__ = "1.0.0"
85
+ __author__ = "Eon Labs"
86
+ __email__ = "terry@eonlabs.com"
87
+
88
+ # Core classes (advanced/power-user API)
89
+ # Enhanced DataFrame for domain-specific operations
90
+ # Convenience functions (simple/intuitive API)
91
+ # API-only probe hooks for AI coding agents
92
+ from . import __probe__, probe
93
+ from .api import (
94
+ InstrumentType, # ADR-0021: Type alias for instrument type hints
95
+ download,
96
+ download_multiple,
97
+ fetch_data,
98
+ fill_gaps,
99
+ get_info,
100
+ get_supported_intervals,
101
+ get_supported_symbols,
102
+ get_supported_timeframes,
103
+ load_parquet,
104
+ save_parquet,
105
+ )
106
+ from .collectors.binance_public_data_collector import BinancePublicDataCollector
107
+ from .exceptions import (
108
+ DataCollectionError,
109
+ GapFillingError,
110
+ GaplessCryptoDataError,
111
+ NetworkError,
112
+ ValidationError,
113
+ )
114
+ from .gap_filling.safe_file_operations import AtomicCSVOperations, SafeCSVMerger
115
+ from .gap_filling.universal_gap_filler import UniversalGapFiller
116
+ from .query_api import query_ohlcv # v6.0.0: Unified query API with auto-ingestion (ADR-0023)
117
+
118
+ __all__ = [
119
+ # Simple function-based API (recommended for most users)
120
+ "query_ohlcv", # v6.0.0: Unified query API with auto-ingestion (ADR-0023)
121
+ "fetch_data",
122
+ "download",
123
+ "download_multiple",
124
+ "get_supported_symbols",
125
+ "get_supported_timeframes",
126
+ "get_supported_intervals", # Legacy compatibility
127
+ "fill_gaps",
128
+ "get_info",
129
+ "save_parquet",
130
+ "load_parquet",
131
+ # Type aliases (v3.2.0 - ADR-0021)
132
+ "InstrumentType", # Literal["spot", "futures-um"]
133
+ # Advanced class-based API (for complex workflows)
134
+ "BinancePublicDataCollector",
135
+ "UniversalGapFiller",
136
+ "AtomicCSVOperations",
137
+ "SafeCSVMerger",
138
+ # Structured exception hierarchy (v3.2.0)
139
+ "GaplessCryptoDataError",
140
+ "DataCollectionError",
141
+ "ValidationError",
142
+ "NetworkError",
143
+ "GapFillingError",
144
+ # AI agent probe hooks (v6.0.0)
145
+ "__probe__",
146
+ "probe",
147
+ ]
@@ -0,0 +1,349 @@
1
+ """
2
+ __probe__.py - API-only probe hooks for AI coding agents
3
+
4
+ Provides deterministic JSON output for effortless AI agent discovery:
5
+ - API map discovery
6
+ - Capabilities detection
7
+ - Task graph generation for phased spawning
8
+ - Zero-file terminal probing
9
+
10
+ Usage:
11
+ import gapless_crypto_clickhouse
12
+ probe = gapless_crypto_clickhouse.__probe__
13
+
14
+ # Initial discovery
15
+ api_map = probe.discover_api()
16
+ capabilities = probe.get_capabilities()
17
+
18
+ # Phased spawning
19
+ task_graph = probe.get_task_graph()
20
+ sub_tasks = probe.generate_uv_cli_tasks(task_graph)
21
+ """
22
+
23
+ import inspect
24
+ from typing import Any, Dict, List, Optional
25
+
26
+ # Internal imports
27
+ from . import api
28
+ from .collectors.binance_public_data_collector import BinancePublicDataCollector
29
+ from .gap_filling.universal_gap_filler import UniversalGapFiller
30
+
31
+
32
+ class ProbeAPI:
33
+ """API-only probe hooks for deterministic JSON output."""
34
+
35
+ def __init__(self):
36
+ self._cache: Dict[str, Any] = {}
37
+
38
+ def discover_api(self) -> Dict[str, Any]:
39
+ """
40
+ Generate deterministic API map for AI agents.
41
+
42
+ Returns:
43
+ Dict containing complete API surface with metadata
44
+ """
45
+ if "api_map" in self._cache:
46
+ return self._cache["api_map"]
47
+
48
+ api_map = {
49
+ "metadata": {
50
+ "package": "gapless-crypto-data",
51
+ "version": "2.15.0",
52
+ "probe_version": "1.0.0",
53
+ "type": "cryptocurrency-data-collection",
54
+ "compatibility": "uv-native",
55
+ },
56
+ "functions": self._discover_functions(),
57
+ "classes": self._discover_classes(),
58
+ "cli": self._discover_cli(),
59
+ "endpoints": self._discover_endpoints(),
60
+ }
61
+
62
+ self._cache["api_map"] = api_map
63
+ return api_map
64
+
65
+ def get_capabilities(self) -> Dict[str, Any]:
66
+ """
67
+ Report package capabilities for AI agents.
68
+
69
+ Returns:
70
+ Dict with capability matrix and operational limits
71
+ """
72
+ if "capabilities" in self._cache:
73
+ return self._cache["capabilities"]
74
+
75
+ capabilities = {
76
+ "data_collection": {
77
+ "source": "binance-public-repository",
78
+ "performance_multiplier": "22x",
79
+ "supported_markets": ["USDT-spot-pairs"],
80
+ "timeframes": [
81
+ "1s",
82
+ "1m",
83
+ "3m",
84
+ "5m",
85
+ "15m",
86
+ "30m",
87
+ "1h",
88
+ "2h",
89
+ "4h",
90
+ "6h",
91
+ "8h",
92
+ "12h",
93
+ "1d",
94
+ ],
95
+ "data_format": "11-column-microstructure",
96
+ "gap_guarantee": "zero-gaps",
97
+ },
98
+ "processing": {
99
+ "memory_streaming": True,
100
+ "atomic_operations": True,
101
+ "resume_capability": True,
102
+ "parallel_symbols": True,
103
+ },
104
+ "integration": {
105
+ "pandas_compatible": True,
106
+ "polars_native": True,
107
+ "pyarrow_backend": True,
108
+ "ccxt_compatible": True,
109
+ },
110
+ "ai_agent_features": {
111
+ "stateless_probing": True,
112
+ "task_graph_generation": True,
113
+ "uv_cli_spawning": True,
114
+ "deterministic_output": True,
115
+ "no_file_operations": True,
116
+ },
117
+ }
118
+
119
+ self._cache["capabilities"] = capabilities
120
+ return capabilities
121
+
122
+ def get_task_graph(self) -> Dict[str, Any]:
123
+ """
124
+ Generate task dependency graph for phased AI agent spawning.
125
+
126
+ Returns:
127
+ Dict with task nodes, dependencies, and execution metadata
128
+ """
129
+ if "task_graph" in self._cache:
130
+ return self._cache["task_graph"]
131
+
132
+ task_graph = {
133
+ "nodes": {
134
+ "discover": {
135
+ "type": "discovery",
136
+ "command": 'python -c "import gapless_crypto_clickhouse; print(gapless_crypto_clickhouse.__probe__.discover_api())"',
137
+ "dependencies": [],
138
+ "outputs": ["api_map"],
139
+ "phase": 0,
140
+ },
141
+ "capabilities": {
142
+ "type": "capability_check",
143
+ "command": 'python -c "import gapless_crypto_clickhouse; print(gapless_crypto_clickhouse.__probe__.get_capabilities())"',
144
+ "dependencies": [],
145
+ "outputs": ["capabilities_matrix"],
146
+ "phase": 0,
147
+ },
148
+ "validate_symbols": {
149
+ "type": "validation",
150
+ "command": 'python -c "import gapless_crypto_clickhouse; print(gapless_crypto_clickhouse.get_supported_symbols())"',
151
+ "dependencies": ["discover"],
152
+ "outputs": ["symbol_list"],
153
+ "phase": 1,
154
+ },
155
+ "validate_timeframes": {
156
+ "type": "validation",
157
+ "command": 'python -c "import gapless_crypto_clickhouse; print(gapless_crypto_clickhouse.get_supported_timeframes())"',
158
+ "dependencies": ["discover"],
159
+ "outputs": ["timeframe_list"],
160
+ "phase": 1,
161
+ },
162
+ "test_collection": {
163
+ "type": "integration_test",
164
+ "command": "python -c \"import gapless_crypto_clickhouse; df=gapless_crypto_clickhouse.fetch_data('BTCUSDT', '1h', limit=5); print(f'✓ {len(df)} rows collected')\"",
165
+ "dependencies": ["validate_symbols", "validate_timeframes"],
166
+ "outputs": ["collection_status"],
167
+ "phase": 2,
168
+ },
169
+ },
170
+ "execution_plan": {
171
+ "phase_0": ["discover", "capabilities"],
172
+ "phase_1": ["validate_symbols", "validate_timeframes"],
173
+ "phase_2": ["test_collection"],
174
+ },
175
+ "parallel_safe": {"phase_0": True, "phase_1": True, "phase_2": False},
176
+ }
177
+
178
+ self._cache["task_graph"] = task_graph
179
+ return task_graph
180
+
181
+ def generate_uv_cli_tasks(self, task_graph: Optional[Dict[str, Any]] = None) -> List[str]:
182
+ """
183
+ Generate uv CLI commands for spawning AI agent sub-tasks.
184
+
185
+ Args:
186
+ task_graph: Optional pre-computed task graph
187
+
188
+ Returns:
189
+ List of uv CLI commands ready for execution
190
+ """
191
+ if task_graph is None:
192
+ task_graph = self.get_task_graph()
193
+
194
+ # Phase 0: Parallel discovery
195
+ phase_0_commands = []
196
+ for node_id in task_graph["execution_plan"]["phase_0"]:
197
+ node = task_graph["nodes"][node_id]
198
+ uv_cmd = f"uv run --active {node['command']}"
199
+ phase_0_commands.append(uv_cmd)
200
+
201
+ # Phase 1: Validation (depends on Phase 0)
202
+ phase_1_commands = []
203
+ for node_id in task_graph["execution_plan"]["phase_1"]:
204
+ node = task_graph["nodes"][node_id]
205
+ uv_cmd = f"uv run --active {node['command']}"
206
+ phase_1_commands.append(uv_cmd)
207
+
208
+ # Phase 2: Integration test (depends on Phase 1)
209
+ phase_2_commands = []
210
+ for node_id in task_graph["execution_plan"]["phase_2"]:
211
+ node = task_graph["nodes"][node_id]
212
+ uv_cmd = f"uv run --active {node['command']}"
213
+ phase_2_commands.append(uv_cmd)
214
+
215
+ return {
216
+ "phase_0_parallel": phase_0_commands,
217
+ "phase_1_parallel": phase_1_commands,
218
+ "phase_2_sequential": phase_2_commands,
219
+ "usage": "Execute phases in order. Within each phase, commands can run in parallel.",
220
+ }
221
+
222
+ def get_probe_info(self) -> Dict[str, Any]:
223
+ """
224
+ Get probe system metadata and health.
225
+
226
+ Returns:
227
+ Dict with probe system status and capabilities
228
+ """
229
+ return {
230
+ "probe_system": {
231
+ "version": "1.0.0",
232
+ "compatible_agents": ["claude-code", "cursor", "copilot", "codeium"],
233
+ "output_format": "deterministic-json",
234
+ "caching": "memory-based",
235
+ "stateless": True,
236
+ },
237
+ "health": {
238
+ "imports_ok": self._check_imports(),
239
+ "api_accessible": self._check_api_access(),
240
+ "cache_status": len(self._cache),
241
+ },
242
+ }
243
+
244
+ def _discover_functions(self) -> Dict[str, Any]:
245
+ """Discover public functions from api module."""
246
+ functions = {}
247
+
248
+ for name in dir(api):
249
+ if not name.startswith("_"):
250
+ obj = getattr(api, name)
251
+ if callable(obj) and inspect.isfunction(obj):
252
+ sig = inspect.signature(obj)
253
+ functions[name] = {
254
+ "parameters": [p.name for p in sig.parameters.values()],
255
+ "docstring": (obj.__doc__ or "").strip(),
256
+ "module": obj.__module__,
257
+ }
258
+
259
+ return functions
260
+
261
+ def _discover_classes(self) -> Dict[str, Any]:
262
+ """Discover public classes."""
263
+ classes = {
264
+ "BinancePublicDataCollector": {
265
+ "module": "gapless_crypto_clickhouse.collectors.binance_public_data_collector",
266
+ "purpose": "high-performance-data-collection",
267
+ "methods": self._get_public_methods(BinancePublicDataCollector),
268
+ },
269
+ "UniversalGapFiller": {
270
+ "module": "gapless_crypto_clickhouse.gap_filling.universal_gap_filler",
271
+ "purpose": "gap-detection-and-filling",
272
+ "methods": self._get_public_methods(UniversalGapFiller),
273
+ },
274
+ }
275
+
276
+ return classes
277
+
278
+ def _discover_cli(self) -> Dict[str, Any]:
279
+ """Discover CLI interface."""
280
+ return {
281
+ "entry_point": "gapless-crypto-data",
282
+ "uv_usage": "uv run gapless-crypto-data",
283
+ "common_patterns": [
284
+ "uv run gapless-crypto-data --symbol BTCUSDT --timeframes 1h,4h",
285
+ "uv run gapless-crypto-data --fill-gaps --directory ./data",
286
+ "uv run gapless-crypto-data --symbol BTCUSDT,ETHUSDT --timeframes 1s,1d",
287
+ ],
288
+ }
289
+
290
+ def _discover_endpoints(self) -> Dict[str, Any]:
291
+ """Discover probe endpoints."""
292
+ return {
293
+ "__probe__.discover_api": "Complete API surface discovery",
294
+ "__probe__.get_capabilities": "Package capability matrix",
295
+ "__probe__.get_task_graph": "Task dependency graph for phased execution",
296
+ "__probe__.generate_uv_cli_tasks": "uv CLI commands for agent spawning",
297
+ "__probe__.get_probe_info": "Probe system metadata and health",
298
+ }
299
+
300
+ def _get_public_methods(self, cls) -> List[str]:
301
+ """Get public methods from a class."""
302
+ return [
303
+ name for name in dir(cls) if not name.startswith("_") and callable(getattr(cls, name))
304
+ ]
305
+
306
+ def _check_imports(self) -> bool:
307
+ """Check if core imports are working."""
308
+ try:
309
+ import importlib.util
310
+
311
+ return (
312
+ importlib.util.find_spec("pandas") is not None
313
+ and importlib.util.find_spec("httpx") is not None
314
+ )
315
+ except ImportError:
316
+ return False
317
+
318
+ def _check_api_access(self) -> bool:
319
+ """Check if API functions are accessible."""
320
+ try:
321
+ from . import api
322
+
323
+ return hasattr(api, "fetch_data")
324
+ except Exception:
325
+ return False
326
+
327
+
328
+ # Global probe instance for easy access
329
+ _probe_instance = ProbeAPI()
330
+
331
+ # Export functions for direct access
332
+ discover_api = _probe_instance.discover_api
333
+ get_capabilities = _probe_instance.get_capabilities
334
+ get_task_graph = _probe_instance.get_task_graph
335
+ generate_uv_cli_tasks = _probe_instance.generate_uv_cli_tasks
336
+ get_probe_info = _probe_instance.get_probe_info
337
+
338
+ # For backwards compatibility and explicit access
339
+ probe = _probe_instance
340
+
341
+ __all__ = [
342
+ "discover_api",
343
+ "get_capabilities",
344
+ "get_task_graph",
345
+ "generate_uv_cli_tasks",
346
+ "get_probe_info",
347
+ "probe",
348
+ "ProbeAPI",
349
+ ]