chatsbom 0.2.8__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {chatsbom-0.2.8 → chatsbom-0.2.9}/.gitignore +1 -7
  2. {chatsbom-0.2.8 → chatsbom-0.2.9}/PKG-INFO +2 -1
  3. {chatsbom-0.2.8 → chatsbom-0.2.9}/README.md +1 -0
  4. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/commands/collect.py +2 -0
  5. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/commands/convert.py +2 -2
  6. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/commands/download.py +3 -3
  7. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/commands/index.py +3 -3
  8. chatsbom-0.2.9/chatsbom/core/clickhouse.py +144 -0
  9. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/core/client.py +6 -1
  10. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/core/config.py +2 -2
  11. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/core/validation.py +1 -1
  12. {chatsbom-0.2.8/config → chatsbom-0.2.9/config/clickhouse}/users.d/guest.xml +1 -1
  13. {chatsbom-0.2.8 → chatsbom-0.2.9}/docker-compose.yaml +2 -2
  14. {chatsbom-0.2.8 → chatsbom-0.2.9}/pyproject.toml +1 -1
  15. {chatsbom-0.2.8 → chatsbom-0.2.9}/test.sh +5 -5
  16. {chatsbom-0.2.8 → chatsbom-0.2.9}/uv.lock +1 -1
  17. chatsbom-0.2.8/chatsbom/core/clickhouse.py +0 -98
  18. {chatsbom-0.2.8 → chatsbom-0.2.9}/.env.example +0 -0
  19. {chatsbom-0.2.8 → chatsbom-0.2.9}/.github/workflows/release.yaml +0 -0
  20. {chatsbom-0.2.8 → chatsbom-0.2.9}/.github/workflows/weekly.yml +0 -0
  21. {chatsbom-0.2.8 → chatsbom-0.2.9}/.pre-commit-config.yaml +0 -0
  22. {chatsbom-0.2.8 → chatsbom-0.2.9}/.python-version +0 -0
  23. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/__init__.py +0 -0
  24. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/__main__.py +0 -0
  25. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/__version__.py +0 -0
  26. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/commands/__init__.py +0 -0
  27. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/commands/chat.py +0 -0
  28. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/commands/query.py +0 -0
  29. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/commands/status.py +0 -0
  30. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/core/__init__.py +0 -0
  31. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/core/repository.py +0 -0
  32. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/core/schema.py +0 -0
  33. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/models/__init__.py +0 -0
  34. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/models/framework.py +0 -0
  35. {chatsbom-0.2.8 → chatsbom-0.2.9}/chatsbom/models/language.py +0 -0
  36. {chatsbom-0.2.8/config → chatsbom-0.2.9/config/clickhouse}/users.d/admin.xml +0 -0
  37. {chatsbom-0.2.8 → chatsbom-0.2.9}/figures/demo.gif +0 -0
  38. {chatsbom-0.2.8 → chatsbom-0.2.9}/figures/use-cases/gin/01.png +0 -0
  39. {chatsbom-0.2.8 → chatsbom-0.2.9}/figures/use-cases/gin/02.png +0 -0
  40. {chatsbom-0.2.8 → chatsbom-0.2.9}/tests/client_test.py +0 -0
  41. {chatsbom-0.2.8 → chatsbom-0.2.9}/tests/collect_test.py +0 -0
  42. {chatsbom-0.2.8 → chatsbom-0.2.9}/tests/convert_sbom_test.py +0 -0
  43. {chatsbom-0.2.8 → chatsbom-0.2.9}/tests/download_test.py +0 -0
  44. {chatsbom-0.2.8 → chatsbom-0.2.9}/tests/framework_test.py +0 -0
  45. {chatsbom-0.2.8 → chatsbom-0.2.9}/tests/index_test.py +0 -0
  46. {chatsbom-0.2.8 → chatsbom-0.2.9}/tests/language_test.py +0 -0
@@ -221,11 +221,5 @@ __marimo__/
221
221
  *.sqlite-shm
222
222
  *.sqlite-wal
223
223
 
224
- # Data folder
224
+ # Runtime data (sbom files, github repo lists, clickhouse storage)
225
225
  /data/
226
-
227
- # JSONL folder
228
- /*.jsonl
229
-
230
- # Database folder
231
- /database/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatsbom
3
- Version: 0.2.8
3
+ Version: 0.2.9
4
4
  Summary: ChatSBOM - Talk to your Supply Chain. Chat with SBOMs.
5
5
  Requires-Python: >=3.12
6
6
  Requires-Dist: claude-agent-sdk>=0.1.0
@@ -95,6 +95,7 @@ ChatSBOM follows a clean, modular architecture with high cohesion and low coupli
95
95
  collect → download → convert → index → status/query/chat
96
96
  ↓ ↓ ↓ ↓
97
97
  .jsonl files/ sbom.json database
98
+ (github/) (sbom/) (sbom/) (clickhouse/)
98
99
  ```
99
100
 
100
101
  ### Core Modules
@@ -73,6 +73,7 @@ ChatSBOM follows a clean, modular architecture with high cohesion and low coupli
73
73
  collect → download → convert → index → status/query/chat
74
74
  ↓ ↓ ↓ ↓
75
75
  .jsonl files/ sbom.json database
76
+ (github/) (sbom/) (sbom/) (clickhouse/)
76
77
  ```
77
78
 
78
79
  ### Core Modules
@@ -159,6 +159,8 @@ class Storage:
159
159
  self.filepath = filepath
160
160
  self.visited_ids: set[int] = set()
161
161
  self.min_stars_seen: float = float('inf')
162
+ # Ensure parent directories exist
163
+ os.makedirs(os.path.dirname(self.filepath), exist_ok=True)
162
164
  self._load_existing()
163
165
 
164
166
  def _load_existing(self):
@@ -134,10 +134,10 @@ def convert_project(project_dir: Path, output_format: str, overwrite: bool) -> C
134
134
 
135
135
  def main(
136
136
  input_dir: str = typer.Option(
137
- 'data', help='Root data directory',
137
+ 'data/sbom', help='Root data directory',
138
138
  ),
139
139
  concurrency: int = typer.Option(
140
- 4, help='Number of concurrent syft processes',
140
+ 16, help='Number of concurrent syft processes',
141
141
  ),
142
142
  output_format: str = typer.Option(
143
143
  'json', '--format', help='Syft output format (json, spdx-json, cyclonedx-json)',
@@ -163,10 +163,10 @@ def load_targets(jsonl_path: str) -> list[dict]:
163
163
 
164
164
  def main(
165
165
  input_file: str | None = typer.Option(
166
- None, help='Input JSONL file path (default: {language}.jsonl)',
166
+ None, help='Input JSONL file path (default: data/github/{language}.jsonl)',
167
167
  ),
168
168
  output_dir: str = typer.Option(
169
- 'data', help='Download destination directory',
169
+ 'data/sbom', help='Download destination directory',
170
170
  ),
171
171
  language: Language | None = typer.Option(
172
172
  None, help='Target Language (default: all)',
@@ -220,7 +220,7 @@ def main(
220
220
  if input_file:
221
221
  target_file = input_file
222
222
  else:
223
- target_file = f"{lang}.jsonl"
223
+ target_file = f"data/github/{lang}.jsonl"
224
224
 
225
225
  # Check if file exists, if not, skip efficiently
226
226
  if not os.path.exists(target_file):
@@ -119,8 +119,8 @@ def scan_artifacts(meta_context: dict[str, Any]) -> list[list[Any]]:
119
119
  repo = meta_context['repo']
120
120
  repo_id = meta_context['id']
121
121
 
122
- # Expected path: data/{language}/{owner}/{repo}/**/sbom.json
123
- base_dir = Path('data') / language / owner / repo
122
+ # Expected path: data/sbom/{language}/{owner}/{repo}/**/sbom.json
123
+ base_dir = Path('data/sbom') / language / owner / repo
124
124
  if not base_dir.exists():
125
125
  return []
126
126
 
@@ -302,7 +302,7 @@ def main(
302
302
  else:
303
303
  langs_to_process = language if language else list(Language)
304
304
  for lang in langs_to_process:
305
- f = Path(f"{lang.value}.jsonl")
305
+ f = Path(f"data/github/{lang.value}.jsonl")
306
306
  if f.exists():
307
307
  files_to_process.append(f)
308
308
  else:
@@ -0,0 +1,144 @@
1
+ """ClickHouse connection utilities."""
2
+ import socket
3
+
4
+ import clickhouse_connect
5
+ import typer
6
+ from rich.console import Console
7
+
8
+
9
+ def check_clickhouse_connection(
10
+ host: str,
11
+ port: int,
12
+ user: str,
13
+ password: str,
14
+ database: str = 'chatsbom',
15
+ console: Console | None = None,
16
+ require_database: bool = True,
17
+ ) -> bool:
18
+ """
19
+ Check ClickHouse connection with multi-step validation.
20
+
21
+ Steps:
22
+ 1. Network - is the server reachable?
23
+ 2. Authentication - are credentials valid?
24
+ 3. Database - does it exist and is it accessible?
25
+ 4. Tables - do required tables exist?
26
+ """
27
+ console = console or Console()
28
+
29
+ if not _check_network(host, port, console):
30
+ raise typer.Exit(1)
31
+
32
+ if not _check_auth(host, port, user, password, console):
33
+ raise typer.Exit(1)
34
+
35
+ if not require_database:
36
+ return True
37
+
38
+ if not _check_database(host, port, user, password, database, console):
39
+ raise typer.Exit(1)
40
+
41
+ if not _check_tables(host, port, user, password, database, console):
42
+ raise typer.Exit(1)
43
+
44
+ return True
45
+
46
+
47
+ def _check_network(host: str, port: int, console: Console) -> bool:
48
+ """Step 1: Check network connectivity."""
49
+ try:
50
+ with socket.create_connection((host, port), timeout=5):
51
+ return True
52
+ except TimeoutError:
53
+ console.print(
54
+ f'[bold red]Error:[/] Connection to [cyan]{host}:{port}[/] timed out.\n\n'
55
+ '[green]Solution:[/] [cyan]docker compose up -d[/]',
56
+ )
57
+ except OSError as e:
58
+ console.print(
59
+ f'[bold red]Error:[/] Cannot reach [cyan]{host}:{port}[/]\n'
60
+ f'[dim]{e}[/dim]\n\n'
61
+ '[green]Solution:[/] [cyan]docker compose up -d[/]',
62
+ )
63
+ return False
64
+
65
+
66
+ def _check_auth(host: str, port: int, user: str, password: str, console: Console) -> bool:
67
+ """Step 2: Check authentication."""
68
+ try:
69
+ client = clickhouse_connect.get_client(
70
+ host=host, port=port, username=user, password=password, database='default',
71
+ )
72
+ client.query('SELECT 1')
73
+ return True
74
+ except Exception as e:
75
+ err = str(e).lower()
76
+ if any(x in err for x in ['authentication', 'password', 'denied', 'incorrect']):
77
+ console.print(
78
+ f'[bold red]Error:[/] Authentication failed for [cyan]{user}[/]\n\n'
79
+ '[green]Solution:[/] Create user:\n'
80
+ f' [cyan]docker exec clickhouse clickhouse-client -q \\\n'
81
+ f' "CREATE USER IF NOT EXISTS {user} IDENTIFIED BY \'<password>\'"[/]',
82
+ )
83
+ else:
84
+ console.print(f'[bold red]Error:[/] Auth failed: [dim]{e}[/dim]')
85
+ return False
86
+
87
+
88
+ def _check_database(
89
+ host: str, port: int, user: str, password: str, database: str, console: Console,
90
+ ) -> bool:
91
+ """Step 3: Check database access."""
92
+ try:
93
+ client = clickhouse_connect.get_client(
94
+ host=host, port=port, username=user, password=password, database=database,
95
+ )
96
+ client.query('SELECT 1')
97
+ return True
98
+ except Exception as e:
99
+ err = str(e).lower()
100
+ if 'unknown database' in err:
101
+ console.print(
102
+ f'[bold red]Error:[/] Database [cyan]{database}[/] does not exist.\n\n'
103
+ '[green]Solution:[/] [cyan]chatsbom index --language go[/]',
104
+ )
105
+ elif any(x in err for x in ['access', 'denied', 'grant', 'not allowed']):
106
+ console.print(
107
+ f'[bold red]Error:[/] User [cyan]{user}[/] cannot access [cyan]{database}[/]\n\n'
108
+ '[green]Solution:[/] Grant access:\n'
109
+ f' [cyan]docker exec clickhouse clickhouse-client -q \\\n'
110
+ f' "GRANT SELECT ON {database}.* TO {user}"[/]\n\n'
111
+ '[dim]Or update config/clickhouse/users.d/guest.xml[/dim]',
112
+ )
113
+ else:
114
+ console.print(
115
+ f'[bold red]Error:[/] Cannot access [cyan]{database}[/]: [dim]{e}[/dim]',
116
+ )
117
+ return False
118
+
119
+
120
+ def _check_tables(
121
+ host: str, port: int, user: str, password: str, database: str, console: Console,
122
+ ) -> bool:
123
+ """Step 4: Check required tables exist."""
124
+ required = {'repositories', 'artifacts'}
125
+
126
+ try:
127
+ client = clickhouse_connect.get_client(
128
+ host=host, port=port, username=user, password=password, database=database,
129
+ )
130
+ result = client.query('SHOW TABLES')
131
+ existing = {row[0] for row in result.result_rows}
132
+
133
+ if missing := required - existing:
134
+ console.print(
135
+ f'[bold red]Error:[/] Missing tables: [cyan]{", ".join(sorted(missing))}[/]\n\n'
136
+ '[green]Solution:[/] [cyan]chatsbom index --language go[/]',
137
+ )
138
+ return False
139
+ return True
140
+ except Exception as e:
141
+ console.print(
142
+ f'[bold red]Error:[/] Cannot check tables: [dim]{e}[/dim]',
143
+ )
144
+ return False
@@ -1,4 +1,5 @@
1
1
  from datetime import timedelta
2
+ from pathlib import Path
2
3
 
3
4
  import requests
4
5
  import requests_cache
@@ -10,7 +11,7 @@ logger = structlog.get_logger('client')
10
11
 
11
12
 
12
13
  def get_http_client(
13
- cache_name: str = 'http_cache',
14
+ cache_name: str = 'data/http/cache.sqlite3',
14
15
  expire_after: int = 86400, # 24 hours
15
16
  retries: int = 3,
16
17
  pool_size: int = 50,
@@ -19,6 +20,10 @@ def get_http_client(
19
20
  Returns a requests session with caching and retry logic.
20
21
  """
21
22
 
23
+ # Ensure the data directory exists
24
+ cache_path = Path(cache_name)
25
+ cache_path.parent.mkdir(parents=True, exist_ok=True)
26
+
22
27
  # Configure Caching
23
28
  # We want to cache 200 OK and 404 Not Found (negative caching)
24
29
  session = requests_cache.CachedSession(
@@ -11,12 +11,12 @@ class PathConfig:
11
11
  """File path configuration."""
12
12
 
13
13
  # Base directories
14
- data_dir: Path = field(default_factory=lambda: Path('data'))
14
+ data_dir: Path = field(default_factory=lambda: Path('data/sbom'))
15
15
  output_dir: Path = field(default_factory=lambda: Path('.'))
16
16
 
17
17
  # File naming conventions
18
18
  sbom_filename: str = 'sbom.json'
19
- repo_list_pattern: str = '{language}.jsonl'
19
+ repo_list_pattern: str = 'data/github/{language}.jsonl'
20
20
 
21
21
  def get_repo_list_path(self, language: str) -> Path:
22
22
  """Get the path for repository list file."""
@@ -64,7 +64,7 @@ def validate_download_structure(data_dir: Path, language: str) -> bool:
64
64
  Validate download directory structure.
65
65
 
66
66
  Expected structure:
67
- data/{language}/{owner}/{repo}/{branch}/[files]
67
+ data/sbom/{language}/{owner}/{repo}/{branch}/[files]
68
68
 
69
69
  Returns:
70
70
  True if valid
@@ -4,7 +4,7 @@
4
4
  <password>guest</password>
5
5
  <profile>readonly</profile>
6
6
  <allow_databases>
7
- <database>sbom</database>
7
+ <database>chatsbom</database>
8
8
  </allow_databases>
9
9
  </guest>
10
10
  </users>
@@ -5,8 +5,8 @@ services:
5
5
  - "8123:8123"
6
6
  - "9000:9000"
7
7
  volumes:
8
- - ./database:/var/lib/clickhouse
9
- - ./config/users.d:/etc/clickhouse-server/users.d
8
+ - ./data/clickhouse:/var/lib/clickhouse
9
+ - ./config/clickhouse/users.d:/etc/clickhouse-server/users.d
10
10
  environment:
11
11
  CLICKHOUSE_DB: sbom
12
12
  ulimits:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "chatsbom"
7
- version = "0.2.8"
7
+ version = "0.2.9"
8
8
  description = "ChatSBOM - Talk to your Supply Chain. Chat with SBOMs."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -1,20 +1,20 @@
1
1
  # 1. Collect repository links from GitHub (e.g., top Go repos)
2
- uv run python -m chatsbom collect --language go --min-stars 10000
2
+ uv run python -m chatsbom collect --min-stars 1000
3
3
 
4
4
  # 2. Download dependency files
5
- uv run python -m chatsbom download --language go
5
+ uv run python -m chatsbom download
6
6
 
7
7
  # 3. Convert to standard SBOM format
8
- uv run python -m chatsbom convert --language go
8
+ uv run python -m chatsbom convert
9
9
 
10
10
  # 4. Index SBOM data into database
11
- uv run python -m chatsbom index --language go
11
+ uv run python -m chatsbom index
12
12
 
13
13
  # 5. Show database statistics
14
14
  uv run python -m chatsbom status
15
15
 
16
16
  # 6. Query dependencies
17
- uv run python -m chatsbom query gin --language go
17
+ uv run python -m chatsbom query gin
18
18
 
19
19
  # 7. Launch AI chat interface
20
20
  uv run python -m chatsbom chat
@@ -179,7 +179,7 @@ wheels = [
179
179
 
180
180
  [[package]]
181
181
  name = "chatsbom"
182
- version = "0.2.7"
182
+ version = "0.2.8"
183
183
  source = { editable = "." }
184
184
  dependencies = [
185
185
  { name = "claude-agent-sdk" },
@@ -1,98 +0,0 @@
1
- """ClickHouse connection utilities."""
2
- import clickhouse_connect
3
- import typer
4
- from rich.console import Console
5
-
6
-
7
- def check_clickhouse_connection(
8
- host: str,
9
- port: int,
10
- user: str,
11
- password: str,
12
- database: str = 'chatsbom',
13
- console: Console | None = None,
14
- require_database: bool = True,
15
- ) -> bool:
16
- """
17
- Check ClickHouse connection and optionally verify database existence.
18
-
19
- Args:
20
- host: ClickHouse host
21
- port: ClickHouse HTTP port
22
- user: ClickHouse username
23
- password: ClickHouse password
24
- database: Database to check
25
- console: Rich console for output (creates one if None)
26
- require_database: If True, check that the database exists
27
-
28
- Returns:
29
- True if connection (and database) check passed, False otherwise
30
-
31
- Raises:
32
- typer.Exit: If connection fails
33
- """
34
- if console is None:
35
- console = Console()
36
-
37
- # Step 1: Test basic connectivity
38
- try:
39
- client = clickhouse_connect.get_client(
40
- host=host,
41
- port=port,
42
- username=user,
43
- password=password,
44
- database='default',
45
- )
46
- client.query('SELECT 1')
47
- except Exception as e:
48
- console.print(
49
- f'[bold red]Error:[/] Failed to connect to ClickHouse at '
50
- f'[cyan]{host}:{port}[/]\n\n'
51
- f'Details: {e}\n\n'
52
- 'Please ensure ClickHouse is running:\n\n'
53
- '[bold]Option 1:[/] Use docker compose\n'
54
- ' [cyan]docker compose up -d[/]\n\n'
55
- '[bold]Option 2:[/] Use docker run\n'
56
- ' Step 1: Start ClickHouse\n'
57
- ' [cyan]docker run --rm -d --name clickhouse \\\n'
58
- ' -p 8123:8123 -p 9000:9000 \\\n'
59
- ' -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 \\\n'
60
- ' -v ./database:/var/lib/clickhouse \\\n'
61
- ' clickhouse/clickhouse-server[/]\n\n'
62
- ' Step 2: Create admin and guest users\n'
63
- ' [cyan]docker exec -it clickhouse clickhouse-client -q \\\n'
64
- " \"CREATE USER IF NOT EXISTS admin IDENTIFIED BY 'admin'\"[/]\n"
65
- ' [cyan]docker exec -it clickhouse clickhouse-client -q \\\n'
66
- " \"CREATE USER IF NOT EXISTS guest IDENTIFIED BY 'guest'\"[/]\n"
67
- ' [cyan]docker exec -it clickhouse clickhouse-client -q \\\n'
68
- " \"GRANT CURRENT GRANTS ON *.* TO admin WITH GRANT OPTION\"[/]\n"
69
- ' [cyan]docker exec -it clickhouse clickhouse-client -q \\\n'
70
- " \"GRANT SELECT ON *.* TO guest\"[/]\n",
71
- )
72
- raise typer.Exit(1)
73
-
74
- # Step 2: Check database exists (if required)
75
- if require_database:
76
- try:
77
- result = client.query(
78
- 'SELECT name FROM system.databases WHERE name = {db:String}',
79
- parameters={'db': database},
80
- )
81
- if not result.result_rows:
82
- console.print(
83
- f'[bold red]Error:[/] Database [cyan]{database}[/] '
84
- 'does not exist.\n\n'
85
- 'Please run the import command first to create and '
86
- 'populate the database:\n\n'
87
- ' [cyan]chatsbom index[/]',
88
- )
89
- raise typer.Exit(1)
90
- except typer.Exit:
91
- raise
92
- except Exception as e:
93
- console.print(
94
- f'[bold red]Error:[/] Failed to check database: {e}',
95
- )
96
- raise typer.Exit(1)
97
-
98
- return True
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes