grafit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grafit-0.1.0/PKG-INFO +20 -0
- grafit-0.1.0/README.md +3 -0
- grafit-0.1.0/pyproject.toml +43 -0
- grafit-0.1.0/setup.cfg +4 -0
- grafit-0.1.0/src/grafit/__init__.py +3 -0
- grafit-0.1.0/src/grafit/cli.py +258 -0
- grafit-0.1.0/src/grafit/database.py +297 -0
- grafit-0.1.0/src/grafit/fit_analyzer.py +169 -0
- grafit-0.1.0/src/grafit/fit_importer.py +354 -0
- grafit-0.1.0/src/grafit/schema_generator.py +211 -0
- grafit-0.1.0/src/grafit.egg-info/PKG-INFO +20 -0
- grafit-0.1.0/src/grafit.egg-info/SOURCES.txt +14 -0
- grafit-0.1.0/src/grafit.egg-info/dependency_links.txt +1 -0
- grafit-0.1.0/src/grafit.egg-info/entry_points.txt +2 -0
- grafit-0.1.0/src/grafit.egg-info/requires.txt +6 -0
- grafit-0.1.0/src/grafit.egg-info/top_level.txt +1 -0
grafit-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: grafit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Import Garmin FIT files into SQLite database and visualize with Grafana
|
|
5
|
+
Author-email: nbr23 <max@23.tf>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/nbr23/grafit
|
|
8
|
+
Project-URL: Repository, https://github.com/nbr23/grafit
|
|
9
|
+
Project-URL: Issues, https://github.com/nbr23/grafit/issues
|
|
10
|
+
Requires-Python: >=3.11
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: python-fitparse>=2.0.4
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
15
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
16
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
17
|
+
|
|
18
|
+
# GraFIT
|
|
19
|
+
|
|
20
|
+
Visualize your FIT files in Grafana
|
grafit-0.1.0/README.md
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "grafit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Import Garmin FIT files into SQLite database and visualize with Grafana"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [
|
|
11
|
+
{name = "nbr23", email = "max@23.tf"}
|
|
12
|
+
]
|
|
13
|
+
license = "MIT"
|
|
14
|
+
requires-python = ">=3.11"
|
|
15
|
+
dependencies = [
|
|
16
|
+
"python-fitparse>=2.0.4",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.optional-dependencies]
|
|
20
|
+
dev = [
|
|
21
|
+
"pytest>=7.0",
|
|
22
|
+
"black>=23.0",
|
|
23
|
+
"ruff>=0.1.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
grafit = "grafit.cli:main"
|
|
28
|
+
|
|
29
|
+
[project.urls]
|
|
30
|
+
Homepage = "https://github.com/nbr23/grafit"
|
|
31
|
+
Repository = "https://github.com/nbr23/grafit"
|
|
32
|
+
Issues = "https://github.com/nbr23/grafit/issues"
|
|
33
|
+
|
|
34
|
+
[tool.setuptools.packages.find]
|
|
35
|
+
where = ["src"]
|
|
36
|
+
|
|
37
|
+
[tool.black]
|
|
38
|
+
line-length = 100
|
|
39
|
+
target-version = ["py38"]
|
|
40
|
+
|
|
41
|
+
[tool.ruff]
|
|
42
|
+
line-length = 100
|
|
43
|
+
target-version = "py38"
|
grafit-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""Command-line interface for GraFIT."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
import logging
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .schema_generator import generate_schema
|
|
10
|
+
from .database import GraFITDatabase
|
|
11
|
+
from .fit_importer import FitImporter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def cmd_generate_schema(args):
|
|
15
|
+
"""Generate SQLite schema from profile.py."""
|
|
16
|
+
profile_path = Path(args.profile)
|
|
17
|
+
output_path = Path(args.output)
|
|
18
|
+
|
|
19
|
+
if not profile_path.exists():
|
|
20
|
+
print(f"Error: Profile file {profile_path} not found")
|
|
21
|
+
sys.exit(1)
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
schema = generate_schema(str(profile_path))
|
|
25
|
+
|
|
26
|
+
# Create output directory if it doesn't exist
|
|
27
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
|
|
29
|
+
with open(output_path, "w") as f:
|
|
30
|
+
f.write(schema)
|
|
31
|
+
|
|
32
|
+
print(f"Schema generated successfully: {output_path}")
|
|
33
|
+
|
|
34
|
+
except Exception as e:
|
|
35
|
+
print(f"Error generating schema: {e}")
|
|
36
|
+
sys.exit(1)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def cmd_init_database(args):
|
|
40
|
+
"""Initialize database with schema."""
|
|
41
|
+
db_path = Path(args.database)
|
|
42
|
+
schema_path = Path(args.schema)
|
|
43
|
+
|
|
44
|
+
if not schema_path.exists():
|
|
45
|
+
print(f"Error: Schema file {schema_path} not found")
|
|
46
|
+
sys.exit(1)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
db = GraFITDatabase(str(db_path))
|
|
50
|
+
db.initialize_schema(str(schema_path))
|
|
51
|
+
|
|
52
|
+
version = db.get_schema_version()
|
|
53
|
+
print(f"Database initialized successfully: {db_path}")
|
|
54
|
+
print(f"Schema version: {version}")
|
|
55
|
+
|
|
56
|
+
db.close()
|
|
57
|
+
|
|
58
|
+
except Exception as e:
|
|
59
|
+
print(f"Error initializing database: {e}")
|
|
60
|
+
sys.exit(1)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def cmd_import(args):
|
|
64
|
+
"""Import FIT files - automatically detects if path is file or directory."""
|
|
65
|
+
input_path = Path(args.path)
|
|
66
|
+
db_path = Path(args.database)
|
|
67
|
+
|
|
68
|
+
if not input_path.exists():
|
|
69
|
+
print(f"Error: Path {input_path} not found")
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
|
|
72
|
+
# Setup logging
|
|
73
|
+
setup_logging(args.verbose)
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
# Auto-create database if it doesn't exist
|
|
77
|
+
if not db_path.exists():
|
|
78
|
+
print(f"Database {db_path} not found, creating with schema...")
|
|
79
|
+
|
|
80
|
+
# Generate schema on the fly
|
|
81
|
+
profile_path = Path(__file__).parent.parent.parent / "extra" / "profile.py"
|
|
82
|
+
if not profile_path.exists():
|
|
83
|
+
print(f"Error: Profile file {profile_path} not found")
|
|
84
|
+
sys.exit(1)
|
|
85
|
+
|
|
86
|
+
schema = generate_schema(str(profile_path))
|
|
87
|
+
|
|
88
|
+
# Create database and initialize with schema
|
|
89
|
+
db = GraFITDatabase(str(db_path))
|
|
90
|
+
db.execute_schema(schema)
|
|
91
|
+
print(f"✓ Database created and initialized: {db_path}")
|
|
92
|
+
else:
|
|
93
|
+
db = GraFITDatabase(str(db_path))
|
|
94
|
+
|
|
95
|
+
importer = FitImporter(db)
|
|
96
|
+
|
|
97
|
+
if input_path.is_file():
|
|
98
|
+
# Import single file
|
|
99
|
+
print(f"Importing file: {input_path}")
|
|
100
|
+
result = importer.import_fit_file(str(input_path), args.force)
|
|
101
|
+
|
|
102
|
+
# Print results
|
|
103
|
+
if result["status"] == "success":
|
|
104
|
+
print(f"✓ Imported {input_path.name}")
|
|
105
|
+
print(f" Messages: {result['messages_imported']}")
|
|
106
|
+
print(f" Tables: {', '.join(result['tables_affected'])}")
|
|
107
|
+
elif result["status"] == "skipped":
|
|
108
|
+
print(f"⏭ Skipped {input_path.name} (already imported)")
|
|
109
|
+
else:
|
|
110
|
+
print(
|
|
111
|
+
f"✗ Failed to import {input_path.name}: {result.get('error', 'Unknown error')}"
|
|
112
|
+
)
|
|
113
|
+
sys.exit(1)
|
|
114
|
+
|
|
115
|
+
elif input_path.is_dir():
|
|
116
|
+
# Import directory
|
|
117
|
+
print(f"Importing directory: {input_path}")
|
|
118
|
+
result = importer.import_directory(
|
|
119
|
+
str(input_path), args.pattern, args.force, args.recursive
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Print results
|
|
123
|
+
print(f"Import complete:")
|
|
124
|
+
print(f" Files processed: {result['files_processed']}")
|
|
125
|
+
print(f" Files imported: {result['files_imported']}")
|
|
126
|
+
print(f" Files skipped: {result['files_skipped']}")
|
|
127
|
+
print(f" Files failed: {result['files_failed']}")
|
|
128
|
+
print(f" Total messages: {result['total_messages']}")
|
|
129
|
+
|
|
130
|
+
if result["failed_files"]:
|
|
131
|
+
print("\\nFailed files:")
|
|
132
|
+
for failure in result["failed_files"]:
|
|
133
|
+
print(f" ✗ {failure['file']}: {failure['error']}")
|
|
134
|
+
|
|
135
|
+
if result["tables_affected"]:
|
|
136
|
+
print(f"\\nTables updated: {', '.join(sorted(result['tables_affected']))}")
|
|
137
|
+
else:
|
|
138
|
+
print(f"Error: {input_path} is neither a file nor a directory")
|
|
139
|
+
sys.exit(1)
|
|
140
|
+
|
|
141
|
+
db.close()
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
print(f"Error importing: {e}")
|
|
145
|
+
sys.exit(1)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def cmd_database_stats(args):
|
|
149
|
+
"""Show database statistics."""
|
|
150
|
+
db_path = Path(args.database)
|
|
151
|
+
|
|
152
|
+
if not db_path.exists():
|
|
153
|
+
print(f"Error: Database {db_path} not found")
|
|
154
|
+
sys.exit(1)
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
db = GraFITDatabase(str(db_path))
|
|
158
|
+
|
|
159
|
+
version = db.get_schema_version()
|
|
160
|
+
stats = db.get_import_stats()
|
|
161
|
+
|
|
162
|
+
print(f"Database: {db_path}")
|
|
163
|
+
print(f"Schema version: {version}")
|
|
164
|
+
print(f"Tables: {len(stats)}")
|
|
165
|
+
|
|
166
|
+
total_records = sum(stats.values())
|
|
167
|
+
print(f"Total records: {total_records:,}")
|
|
168
|
+
|
|
169
|
+
if args.detailed:
|
|
170
|
+
print("\\nTable breakdown:")
|
|
171
|
+
for table, count in sorted(stats.items(), key=lambda x: x[1], reverse=True):
|
|
172
|
+
if count > 0:
|
|
173
|
+
print(f" {table}: {count:,}")
|
|
174
|
+
|
|
175
|
+
db.close()
|
|
176
|
+
|
|
177
|
+
except Exception as e:
|
|
178
|
+
print(f"Error reading database stats: {e}")
|
|
179
|
+
sys.exit(1)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def setup_logging(verbose: bool):
|
|
183
|
+
"""Setup logging configuration."""
|
|
184
|
+
level = logging.DEBUG if verbose else logging.INFO
|
|
185
|
+
logging.basicConfig(level=level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def main():
|
|
189
|
+
"""Main CLI entry point."""
|
|
190
|
+
parser = argparse.ArgumentParser(
|
|
191
|
+
prog="grafit", description="Import Garmin FIT files into SQLite database"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
195
|
+
|
|
196
|
+
# Schema generation command
|
|
197
|
+
schema_parser = subparsers.add_parser(
|
|
198
|
+
"generate-schema", help="Generate SQLite schema from FIT profile"
|
|
199
|
+
)
|
|
200
|
+
schema_parser.add_argument("profile", help="Path to profile.py file")
|
|
201
|
+
schema_parser.add_argument("output", help="Output SQL file path")
|
|
202
|
+
schema_parser.set_defaults(func=cmd_generate_schema)
|
|
203
|
+
|
|
204
|
+
# Database initialization command
|
|
205
|
+
init_parser = subparsers.add_parser("init-database", help="Initialize database with schema")
|
|
206
|
+
init_parser.add_argument("database", help="Path to SQLite database file")
|
|
207
|
+
init_parser.add_argument("schema", help="Path to schema SQL file")
|
|
208
|
+
init_parser.set_defaults(func=cmd_init_database)
|
|
209
|
+
|
|
210
|
+
# Import command (handles both files and directories)
|
|
211
|
+
import_parser = subparsers.add_parser(
|
|
212
|
+
"import", help="Import FIT file(s) - automatically detects file or directory"
|
|
213
|
+
)
|
|
214
|
+
import_parser.add_argument("path", help="Path to FIT file or directory containing FIT files")
|
|
215
|
+
import_parser.add_argument(
|
|
216
|
+
"database",
|
|
217
|
+
nargs="?",
|
|
218
|
+
default="fit_data.db",
|
|
219
|
+
help="Path to SQLite database file (default: fit_data.db)",
|
|
220
|
+
)
|
|
221
|
+
import_parser.add_argument(
|
|
222
|
+
"--pattern", default="*.fit", help="File pattern to match for directories (default: *.fit)"
|
|
223
|
+
)
|
|
224
|
+
import_parser.add_argument(
|
|
225
|
+
"--force", action="store_true", help="Force reimport even if files already exist"
|
|
226
|
+
)
|
|
227
|
+
import_parser.add_argument(
|
|
228
|
+
"--no-recursive",
|
|
229
|
+
dest="recursive",
|
|
230
|
+
action="store_false",
|
|
231
|
+
help="Don't search subdirectories when importing directories",
|
|
232
|
+
)
|
|
233
|
+
import_parser.add_argument(
|
|
234
|
+
"--verbose", "-v", action="store_true", help="Enable verbose logging"
|
|
235
|
+
)
|
|
236
|
+
import_parser.set_defaults(func=cmd_import)
|
|
237
|
+
|
|
238
|
+
# Database stats command
|
|
239
|
+
stats_parser = subparsers.add_parser("stats", help="Show database statistics")
|
|
240
|
+
stats_parser.add_argument("database", help="Path to SQLite database file")
|
|
241
|
+
stats_parser.add_argument(
|
|
242
|
+
"--detailed", action="store_true", help="Show detailed table breakdown"
|
|
243
|
+
)
|
|
244
|
+
stats_parser.set_defaults(func=cmd_database_stats)
|
|
245
|
+
|
|
246
|
+
# Parse arguments
|
|
247
|
+
args = parser.parse_args()
|
|
248
|
+
|
|
249
|
+
if not args.command:
|
|
250
|
+
parser.print_help()
|
|
251
|
+
sys.exit(1)
|
|
252
|
+
|
|
253
|
+
# Execute the command
|
|
254
|
+
args.func(args)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
if __name__ == "__main__":
|
|
258
|
+
main()
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""Database connection and initialization for GraFIT."""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
import hashlib
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional, Dict, Any, List
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GraFITDatabase:
|
|
14
|
+
"""Manages SQLite database connections and operations for GraFIT."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, db_path: str):
|
|
17
|
+
"""Initialize database connection."""
|
|
18
|
+
self.db_path = Path(db_path)
|
|
19
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
|
|
21
|
+
# Configure SQLite connection
|
|
22
|
+
self.connection = sqlite3.connect(
|
|
23
|
+
str(self.db_path), isolation_level=None # Autocommit mode
|
|
24
|
+
)
|
|
25
|
+
self.connection.row_factory = sqlite3.Row # Access columns by name
|
|
26
|
+
|
|
27
|
+
# Enable WAL mode and other optimizations
|
|
28
|
+
self._configure_database()
|
|
29
|
+
|
|
30
|
+
def _configure_database(self):
|
|
31
|
+
"""Configure SQLite database settings."""
|
|
32
|
+
cursor = self.connection.cursor()
|
|
33
|
+
|
|
34
|
+
# Enable optimizations
|
|
35
|
+
cursor.execute("PRAGMA foreign_keys = ON")
|
|
36
|
+
cursor.execute("PRAGMA journal_mode = WAL")
|
|
37
|
+
cursor.execute("PRAGMA synchronous = NORMAL")
|
|
38
|
+
cursor.execute("PRAGMA cache_size = -64000") # 64MB cache
|
|
39
|
+
cursor.execute("PRAGMA temp_store = MEMORY")
|
|
40
|
+
|
|
41
|
+
cursor.close()
|
|
42
|
+
|
|
43
|
+
def initialize_schema(self, schema_file: str):
|
|
44
|
+
"""Initialize database schema from SQL file."""
|
|
45
|
+
schema_path = Path(schema_file)
|
|
46
|
+
if not schema_path.exists():
|
|
47
|
+
raise FileNotFoundError(f"Schema file not found: {schema_file}")
|
|
48
|
+
|
|
49
|
+
with open(schema_path, "r") as f:
|
|
50
|
+
schema_sql = f.read()
|
|
51
|
+
|
|
52
|
+
self.execute_schema(schema_sql)
|
|
53
|
+
logger.info(f"Database schema initialized from {schema_file}")
|
|
54
|
+
|
|
55
|
+
def execute_schema(self, schema_sql: str):
|
|
56
|
+
"""Execute schema SQL directly."""
|
|
57
|
+
cursor = self.connection.cursor()
|
|
58
|
+
try:
|
|
59
|
+
cursor.executescript(schema_sql)
|
|
60
|
+
logger.info("Database schema executed successfully")
|
|
61
|
+
except sqlite3.Error as e:
|
|
62
|
+
logger.error(f"Failed to execute schema: {e}")
|
|
63
|
+
raise
|
|
64
|
+
finally:
|
|
65
|
+
cursor.close()
|
|
66
|
+
|
|
67
|
+
def is_file_imported(self, file_hash: str) -> bool:
|
|
68
|
+
"""Check if a file has already been imported."""
|
|
69
|
+
cursor = self.connection.cursor()
|
|
70
|
+
try:
|
|
71
|
+
# Check any table for this _file_hash
|
|
72
|
+
cursor.execute(
|
|
73
|
+
"""
|
|
74
|
+
SELECT 1 FROM file_id WHERE _file_hash = ? LIMIT 1
|
|
75
|
+
""",
|
|
76
|
+
(file_hash,),
|
|
77
|
+
)
|
|
78
|
+
return cursor.fetchone() is not None
|
|
79
|
+
except sqlite3.Error:
|
|
80
|
+
# If file_id table doesn't exist, file hasn't been imported
|
|
81
|
+
return False
|
|
82
|
+
finally:
|
|
83
|
+
cursor.close()
|
|
84
|
+
|
|
85
|
+
def insert_message_data(
|
|
86
|
+
self, table_name: str, data: Dict[str, Any], file_hash: str, file_id: str
|
|
87
|
+
):
|
|
88
|
+
"""Insert message data into the specified table."""
|
|
89
|
+
if not data:
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
# Add _file_hash, _file_id and current timestamp
|
|
93
|
+
data = data.copy()
|
|
94
|
+
data["_file_hash"] = file_hash
|
|
95
|
+
data["_file_id"] = file_id
|
|
96
|
+
|
|
97
|
+
# Get valid columns for this table to filter out unknown fields
|
|
98
|
+
valid_columns = self._get_table_columns(table_name)
|
|
99
|
+
if not valid_columns:
|
|
100
|
+
logger.warning(f"Could not get column info for table {table_name}")
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
# Filter data to only include columns that exist in the table
|
|
104
|
+
filtered_data = {}
|
|
105
|
+
for key, value in data.items():
|
|
106
|
+
if key in valid_columns:
|
|
107
|
+
filtered_data[key] = value
|
|
108
|
+
else:
|
|
109
|
+
logger.debug(f"Skipping unknown column {key} for table {table_name}")
|
|
110
|
+
|
|
111
|
+
if not filtered_data:
|
|
112
|
+
logger.debug(f"No valid data to insert into {table_name}")
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
# Build INSERT statement
|
|
116
|
+
columns = list(filtered_data.keys())
|
|
117
|
+
placeholders = ["?" for _ in columns]
|
|
118
|
+
values = [filtered_data[col] for col in columns]
|
|
119
|
+
|
|
120
|
+
# Handle quoted table names
|
|
121
|
+
table_ref = (
|
|
122
|
+
f'"{table_name}"' if table_name in ["set", "user", "group", "order"] else table_name
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
sql = f"""
|
|
126
|
+
INSERT INTO {table_ref} ({', '.join(columns)})
|
|
127
|
+
VALUES ({', '.join(placeholders)})
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
cursor = self.connection.cursor()
|
|
131
|
+
try:
|
|
132
|
+
cursor.execute(sql, values)
|
|
133
|
+
logger.debug(f"Inserted data into {table_name}: {len(values)} fields")
|
|
134
|
+
except sqlite3.Error as e:
|
|
135
|
+
logger.error(f"Failed to insert into {table_name}: {e}")
|
|
136
|
+
logger.debug(f"Filtered data: {filtered_data}")
|
|
137
|
+
raise
|
|
138
|
+
finally:
|
|
139
|
+
cursor.close()
|
|
140
|
+
|
|
141
|
+
def _get_table_columns(self, table_name: str) -> set:
|
|
142
|
+
"""Get set of column names for a table."""
|
|
143
|
+
cursor = self.connection.cursor()
|
|
144
|
+
try:
|
|
145
|
+
# Handle quoted table names
|
|
146
|
+
table_ref = (
|
|
147
|
+
f'"{table_name}"' if table_name in ["set", "user", "group", "order"] else table_name
|
|
148
|
+
)
|
|
149
|
+
cursor.execute(f"PRAGMA table_info({table_ref})")
|
|
150
|
+
return {row[1] for row in cursor.fetchall()} # row[1] is column name
|
|
151
|
+
except sqlite3.Error as e:
|
|
152
|
+
logger.error(f"Failed to get column info for {table_name}: {e}")
|
|
153
|
+
return set()
|
|
154
|
+
finally:
|
|
155
|
+
cursor.close()
|
|
156
|
+
|
|
157
|
+
def bulk_insert_message_data(
|
|
158
|
+
self, table_name: str, data_list: List[Dict[str, Any]], file_hash: str, file_id: str
|
|
159
|
+
):
|
|
160
|
+
"""Bulk insert multiple records for better performance."""
|
|
161
|
+
if not data_list:
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
# Get valid columns for this table
|
|
165
|
+
valid_columns = self._get_table_columns(table_name)
|
|
166
|
+
if not valid_columns:
|
|
167
|
+
logger.warning(f"Could not get column info for table {table_name}")
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
# Filter and prepare all records
|
|
171
|
+
filtered_data_list = []
|
|
172
|
+
for data in data_list:
|
|
173
|
+
# Add _file_hash and _file_id
|
|
174
|
+
data = data.copy()
|
|
175
|
+
data["_file_hash"] = file_hash
|
|
176
|
+
data["_file_id"] = file_id
|
|
177
|
+
|
|
178
|
+
# Filter to valid columns
|
|
179
|
+
filtered_data = {k: v for k, v in data.items() if k in valid_columns}
|
|
180
|
+
if filtered_data:
|
|
181
|
+
filtered_data_list.append(filtered_data)
|
|
182
|
+
|
|
183
|
+
if not filtered_data_list:
|
|
184
|
+
logger.debug(f"No valid data to bulk insert into {table_name}")
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
# Collect all possible columns from all records
|
|
188
|
+
all_columns = set()
|
|
189
|
+
for data in filtered_data_list:
|
|
190
|
+
all_columns.update(data.keys())
|
|
191
|
+
|
|
192
|
+
columns = sorted(list(all_columns)) # Sort for consistent ordering
|
|
193
|
+
placeholders = ["?" for _ in columns]
|
|
194
|
+
|
|
195
|
+
# Handle quoted table names
|
|
196
|
+
table_ref = (
|
|
197
|
+
f'"{table_name}"' if table_name in ["set", "user", "group", "order"] else table_name
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
sql = f"""
|
|
201
|
+
INSERT INTO {table_ref} ({', '.join(columns)})
|
|
202
|
+
VALUES ({', '.join(placeholders)})
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
# Prepare all value tuples
|
|
206
|
+
values_list = [[data.get(col) for col in columns] for data in filtered_data_list]
|
|
207
|
+
|
|
208
|
+
cursor = self.connection.cursor()
|
|
209
|
+
try:
|
|
210
|
+
cursor.executemany(sql, values_list)
|
|
211
|
+
logger.debug(f"Bulk inserted {len(filtered_data_list)} records into {table_name}")
|
|
212
|
+
except sqlite3.Error as e:
|
|
213
|
+
logger.error(f"Failed to bulk insert into {table_name}: {e}")
|
|
214
|
+
raise
|
|
215
|
+
finally:
|
|
216
|
+
cursor.close()
|
|
217
|
+
|
|
218
|
+
@contextmanager
|
|
219
|
+
def transaction(self):
|
|
220
|
+
"""Context manager for database transactions."""
|
|
221
|
+
cursor = self.connection.cursor()
|
|
222
|
+
try:
|
|
223
|
+
cursor.execute("BEGIN")
|
|
224
|
+
yield cursor
|
|
225
|
+
cursor.execute("COMMIT")
|
|
226
|
+
except Exception:
|
|
227
|
+
cursor.execute("ROLLBACK")
|
|
228
|
+
raise
|
|
229
|
+
finally:
|
|
230
|
+
cursor.close()
|
|
231
|
+
|
|
232
|
+
def get_schema_version(self) -> Optional[str]:
|
|
233
|
+
"""Get current schema version from metadata table."""
|
|
234
|
+
cursor = self.connection.cursor()
|
|
235
|
+
try:
|
|
236
|
+
cursor.execute(
|
|
237
|
+
"""
|
|
238
|
+
SELECT value FROM _grafit_metadata
|
|
239
|
+
WHERE key = 'schema_version'
|
|
240
|
+
"""
|
|
241
|
+
)
|
|
242
|
+
result = cursor.fetchone()
|
|
243
|
+
return result[0] if result else None
|
|
244
|
+
except sqlite3.Error:
|
|
245
|
+
return None
|
|
246
|
+
finally:
|
|
247
|
+
cursor.close()
|
|
248
|
+
|
|
249
|
+
def get_table_names(self) -> List[str]:
|
|
250
|
+
"""Get list of all table names in the database."""
|
|
251
|
+
cursor = self.connection.cursor()
|
|
252
|
+
try:
|
|
253
|
+
cursor.execute(
|
|
254
|
+
"""
|
|
255
|
+
SELECT name FROM sqlite_master
|
|
256
|
+
WHERE type='table' AND name NOT LIKE 'sqlite_%'
|
|
257
|
+
AND name != '_grafit_metadata' -- Exclude only our metadata table
|
|
258
|
+
ORDER BY name
|
|
259
|
+
"""
|
|
260
|
+
)
|
|
261
|
+
return [row[0] for row in cursor.fetchall()]
|
|
262
|
+
finally:
|
|
263
|
+
cursor.close()
|
|
264
|
+
|
|
265
|
+
def get_import_stats(self) -> Dict[str, int]:
|
|
266
|
+
"""Get statistics about imported data."""
|
|
267
|
+
stats = {}
|
|
268
|
+
tables = self.get_table_names()
|
|
269
|
+
|
|
270
|
+
cursor = self.connection.cursor()
|
|
271
|
+
try:
|
|
272
|
+
for table in tables:
|
|
273
|
+
# Handle reserved words like 'set'
|
|
274
|
+
table_name = f'"{table}"' if table in ["set", "user", "group", "order"] else table
|
|
275
|
+
cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
|
|
276
|
+
stats[table] = cursor.fetchone()[0]
|
|
277
|
+
finally:
|
|
278
|
+
cursor.close()
|
|
279
|
+
|
|
280
|
+
return stats
|
|
281
|
+
|
|
282
|
+
def close(self):
|
|
283
|
+
"""Close database connection."""
|
|
284
|
+
if self.connection:
|
|
285
|
+
self.connection.close()
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def calculate_file_hash(file_path: str) -> str:
|
|
289
|
+
"""Calculate SHA-256 hash of a file for deduplication."""
|
|
290
|
+
hash_sha256 = hashlib.sha256()
|
|
291
|
+
|
|
292
|
+
with open(file_path, "rb") as f:
|
|
293
|
+
# Read in chunks to handle large files
|
|
294
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
|
295
|
+
hash_sha256.update(chunk)
|
|
296
|
+
|
|
297
|
+
return hash_sha256.hexdigest()
|