graphora 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graphora/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """
2
+ Graphora Client Library
3
+
4
+ A Python client for interacting with the Graphora API.
5
+ """
6
+
7
+ from importlib.metadata import version, PackageNotFoundError
8
+
9
+ from graphora.client import GraphoraClient
10
+ from graphora.models import * # noqa: F401,F403 - re-export public models
11
+
12
+ try:
13
+ __version__ = version("graphora")
14
+ except PackageNotFoundError:
15
+ __version__ = "0.0.0-dev"
@@ -0,0 +1,8 @@
1
+ """Graphora CLI - Command-line interface for knowledge graph extraction."""
2
+
3
+ from importlib.metadata import version, PackageNotFoundError
4
+
5
+ try:
6
+ __version__ = version("graphora")
7
+ except PackageNotFoundError:
8
+ __version__ = "0.0.0-dev"
@@ -0,0 +1,380 @@
1
+ """API Manager - handles downloading and managing graphora-api.
2
+
3
+ Automatically downloads and caches graphora-api from GitHub for embedded mode.
4
+ """
5
+
6
+ import os
7
+ import shutil
8
+ import subprocess
9
+ import sys
10
+ import tempfile
11
+ import zipfile
12
+ from pathlib import Path
13
+ from typing import Optional, Tuple
14
+
15
+ import requests
16
+
17
+ from graphora.cli.config import CONFIG_DIR, get_config_value, set_config_value
18
+
19
+
20
+ # Default GitHub repo
21
+ GITHUB_REPO = "graphora/graphora-api"
22
+ GITHUB_API_URL = f"https://api.github.com/repos/{GITHUB_REPO}"
23
+ DEFAULT_BRANCH = "main"
24
+
25
+ # Cache directory
26
+ API_CACHE_DIR = CONFIG_DIR / "api"
27
+ DEPS_INSTALLED_MARKER = ".deps_installed"
28
+
29
+ # Version channels
30
+ VERSION_CHANNELS = {
31
+ "stable": "latest", # Latest stable release
32
+ "nightly": "nightly", # Latest nightly build
33
+ "main": "main", # Main branch (development)
34
+ }
35
+
36
+
37
+ def get_api_path() -> Optional[Path]:
38
+ """Get the path to graphora-api (cached or custom).
39
+
40
+ Returns:
41
+ Path to graphora-api directory, or None if not available.
42
+ """
43
+ # Check for custom path override first
44
+ custom_path = get_config_value("embedded.api_path")
45
+ if custom_path:
46
+ custom_path = Path(os.path.expanduser(custom_path))
47
+ if custom_path.is_dir():
48
+ return custom_path
49
+
50
+ # Check cached version
51
+ cached_path = get_cached_api_path()
52
+ if cached_path and cached_path.is_dir():
53
+ return cached_path
54
+
55
+ return None
56
+
57
+
58
+ def get_cached_api_path() -> Optional[Path]:
59
+ """Get path to the cached graphora-api."""
60
+ if not API_CACHE_DIR.exists():
61
+ return None
62
+
63
+ # Look for the extracted directory
64
+ for item in API_CACHE_DIR.iterdir():
65
+ if item.is_dir() and item.name.startswith("graphora-api"):
66
+ return item
67
+
68
+ return None
69
+
70
+
71
+ def get_installed_version() -> Optional[str]:
72
+ """Get the currently installed version."""
73
+ return get_config_value("embedded.installed_version")
74
+
75
+
76
+ def get_latest_version(channel: str = "stable") -> str:
77
+ """Fetch the latest version from GitHub.
78
+
79
+ Args:
80
+ channel: Version channel - "stable", "nightly", or "main"
81
+
82
+ Returns:
83
+ Version string (e.g., "v1.0.0", "nightly-20240101", or "main")
84
+ """
85
+ if channel == "main":
86
+ return DEFAULT_BRANCH
87
+
88
+ if channel == "nightly":
89
+ # Get latest nightly release
90
+ try:
91
+ response = requests.get(
92
+ f"{GITHUB_API_URL}/releases",
93
+ timeout=10,
94
+ headers={"Accept": "application/vnd.github.v3+json"},
95
+ params={"per_page": 20}
96
+ )
97
+ if response.status_code == 200:
98
+ releases = response.json()
99
+ for release in releases:
100
+ tag = release.get("tag_name", "")
101
+ if tag.startswith("nightly-"):
102
+ return tag
103
+ except Exception:
104
+ pass
105
+ # Fall back to nightly tag
106
+ return "nightly"
107
+
108
+ # Stable channel - get latest non-prerelease
109
+ try:
110
+ response = requests.get(
111
+ f"{GITHUB_API_URL}/releases/latest",
112
+ timeout=10,
113
+ headers={"Accept": "application/vnd.github.v3+json"}
114
+ )
115
+ if response.status_code == 200:
116
+ data = response.json()
117
+ tag = data.get("tag_name")
118
+ if tag:
119
+ return tag
120
+ except Exception:
121
+ pass
122
+
123
+ # No releases found, use main branch
124
+ return DEFAULT_BRANCH
125
+
126
+
127
+ def is_api_available() -> bool:
128
+ """Check if graphora-api is available and ready to use."""
129
+ api_path = get_api_path()
130
+ if not api_path:
131
+ return False
132
+
133
+ # Check if key files exist
134
+ required_files = [
135
+ "app/__init__.py",
136
+ "app/services/transform/graph_transformer.py",
137
+ ]
138
+
139
+ for file in required_files:
140
+ if not (api_path / file).exists():
141
+ return False
142
+
143
+ return True
144
+
145
+
146
+ def is_deps_installed() -> bool:
147
+ """Check if dependencies have been installed."""
148
+ cached_path = get_cached_api_path()
149
+ if not cached_path:
150
+ return False
151
+ return (cached_path / DEPS_INSTALLED_MARKER).exists()
152
+
153
+
154
+ def download_api(
155
+ version: Optional[str] = None,
156
+ channel: str = "main",
157
+ force: bool = False,
158
+ progress_callback=None,
159
+ ) -> Tuple[bool, str]:
160
+ """Download graphora-api from GitHub.
161
+
162
+ Args:
163
+ version: Version to download (tag or branch). Defaults to main branch.
164
+ channel: Version channel - "stable", "nightly", or "main". Default: main.
165
+ force: Force re-download even if already cached.
166
+ progress_callback: Optional callback for progress updates.
167
+
168
+ Returns:
169
+ Tuple of (success, message).
170
+ """
171
+ if progress_callback:
172
+ progress_callback("Checking for updates...")
173
+
174
+ # Determine version to download
175
+ if not version:
176
+ configured_channel = get_config_value("embedded.channel") or channel
177
+ version = get_config_value("embedded.version") or get_latest_version(configured_channel)
178
+
179
+ # Check if already installed
180
+ installed_version = get_installed_version()
181
+ if not force and installed_version == version and is_api_available():
182
+ return True, f"graphora-api {version} already installed"
183
+
184
+ if progress_callback:
185
+ progress_callback(f"Downloading graphora-api {version}...")
186
+
187
+ # Create cache directory
188
+ API_CACHE_DIR.mkdir(parents=True, exist_ok=True)
189
+
190
+ # Determine download URL based on version format
191
+ # Formats: YYYY-MM-vX.Y.Z, vX.Y.Z, nightly-YYYYMMDD, main
192
+ if version in ["main", "nightly", "latest"]:
193
+ # It's a branch or special tag - use branch URL
194
+ zip_url = f"https://github.com/{GITHUB_REPO}/archive/refs/heads/{version}.zip"
195
+ else:
196
+ # It's a tag (YYYY-MM-vX.Y.Z, vX.Y.Z, nightly-YYYYMMDD, etc.)
197
+ zip_url = f"https://github.com/{GITHUB_REPO}/archive/refs/tags/{version}.zip"
198
+
199
+ try:
200
+ response = requests.get(zip_url, stream=True, timeout=60)
201
+ response.raise_for_status()
202
+
203
+ # Save to temp file
204
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_file:
205
+ total_size = int(response.headers.get("content-length", 0))
206
+ downloaded = 0
207
+
208
+ for chunk in response.iter_content(chunk_size=8192):
209
+ tmp_file.write(chunk)
210
+ downloaded += len(chunk)
211
+ if progress_callback and total_size:
212
+ pct = int(downloaded / total_size * 100)
213
+ progress_callback(f"Downloading... {pct}%")
214
+
215
+ tmp_path = tmp_file.name
216
+
217
+ if progress_callback:
218
+ progress_callback("Extracting...")
219
+
220
+ # Remove old cached version
221
+ if API_CACHE_DIR.exists():
222
+ for item in API_CACHE_DIR.iterdir():
223
+ if item.is_dir() and item.name.startswith("graphora-api"):
224
+ shutil.rmtree(item)
225
+
226
+ # Extract zip
227
+ with zipfile.ZipFile(tmp_path, "r") as zip_ref:
228
+ zip_ref.extractall(API_CACHE_DIR)
229
+
230
+ # Clean up temp file
231
+ os.unlink(tmp_path)
232
+
233
+ # Save installed version
234
+ set_config_value("embedded.installed_version", version)
235
+
236
+ return True, f"Successfully downloaded graphora-api {version}"
237
+
238
+ except requests.exceptions.RequestException as e:
239
+ return False, f"Download failed: {e}"
240
+ except zipfile.BadZipFile:
241
+ return False, "Downloaded file is not a valid zip"
242
+ except Exception as e:
243
+ return False, f"Installation failed: {e}"
244
+
245
+
246
+ def _extract_deps_from_pyproject(pyproject_path: Path) -> list:
247
+ """Extract dependency list from pyproject.toml without installing the package itself.
248
+
249
+ This avoids `pip install -e .` which would register the `app` package
250
+ with incomplete subpackage discovery (packages=["app"]), shadowing
251
+ the sys.path-based imports that the CLI relies on.
252
+ """
253
+ try:
254
+ import tomllib
255
+ except ImportError:
256
+ try:
257
+ import tomli as tomllib
258
+ except ImportError:
259
+ # Fall back to basic parsing
260
+ return []
261
+
262
+ try:
263
+ with open(pyproject_path, "rb") as f:
264
+ data = tomllib.load(f)
265
+ return data.get("project", {}).get("dependencies", [])
266
+ except Exception:
267
+ return []
268
+
269
+
270
+ def install_dependencies(progress_callback=None) -> Tuple[bool, str]:
271
+ """Install graphora-api dependencies.
272
+
273
+ IMPORTANT: We install only the dependencies, NOT the package itself.
274
+ The API's pyproject.toml uses packages=["app"] which only registers the
275
+ top-level app package without subpackages. This shadows the sys.path-based
276
+ import that the CLI uses, breaking imports like app.services.document_parser.
277
+
278
+ Args:
279
+ progress_callback: Optional callback for progress updates.
280
+
281
+ Returns:
282
+ Tuple of (success, message).
283
+ """
284
+ api_path = get_cached_api_path()
285
+ if not api_path:
286
+ return False, "graphora-api not downloaded"
287
+
288
+ requirements_file = api_path / "requirements.txt"
289
+ if not requirements_file.exists():
290
+ # Extract deps from pyproject.toml WITHOUT installing the package
291
+ pyproject_file = api_path / "pyproject.toml"
292
+ if pyproject_file.exists():
293
+ deps = _extract_deps_from_pyproject(pyproject_file)
294
+ if deps:
295
+ if progress_callback:
296
+ progress_callback("Installing API dependencies...")
297
+ try:
298
+ subprocess.check_call(
299
+ [sys.executable, "-m", "pip", "install"] + deps + ["-q"],
300
+ stdout=subprocess.DEVNULL,
301
+ )
302
+ except subprocess.CalledProcessError:
303
+ # Non-fatal: CLI extras already include key deps (aiofiles, google-genai)
304
+ if progress_callback:
305
+ progress_callback("Some API dependencies could not be installed (non-fatal)")
306
+ else:
307
+ if progress_callback:
308
+ progress_callback("Installing dependencies...")
309
+
310
+ try:
311
+ subprocess.check_call(
312
+ [sys.executable, "-m", "pip", "install", "-r", str(requirements_file), "-q"],
313
+ stdout=subprocess.DEVNULL,
314
+ )
315
+ except subprocess.CalledProcessError as e:
316
+ return False, f"Failed to install dependencies: {e}"
317
+
318
+ # Mark deps as installed
319
+ marker_file = api_path / DEPS_INSTALLED_MARKER
320
+ marker_file.touch()
321
+
322
+ return True, "Dependencies installed successfully"
323
+
324
+
325
+ def setup_api(
326
+ version: Optional[str] = None,
327
+ channel: str = "main",
328
+ force: bool = False,
329
+ install_deps: bool = True,
330
+ progress_callback=None,
331
+ ) -> Tuple[bool, str]:
332
+ """Download and set up graphora-api.
333
+
334
+ This is the main entry point for setting up embedded mode.
335
+
336
+ Args:
337
+ version: Version to install (defaults to latest based on channel).
338
+ channel: Version channel - "stable", "nightly", or "main".
339
+ force: Force re-download.
340
+ install_deps: Whether to install Python dependencies.
341
+ progress_callback: Optional callback for progress updates.
342
+
343
+ Returns:
344
+ Tuple of (success, message).
345
+ """
346
+ # Download
347
+ success, message = download_api(version, channel, force, progress_callback)
348
+ if not success:
349
+ return False, message
350
+
351
+ # Install dependencies
352
+ if install_deps and not is_deps_installed():
353
+ if progress_callback:
354
+ progress_callback("Installing dependencies...")
355
+
356
+ success, dep_message = install_dependencies(progress_callback)
357
+ if not success:
358
+ return False, dep_message
359
+
360
+ if progress_callback:
361
+ progress_callback("Setup complete!")
362
+
363
+ return True, "graphora-api setup complete"
364
+
365
+
366
+ def add_api_to_path() -> bool:
367
+ """Add graphora-api to Python path.
368
+
369
+ Returns:
370
+ True if successful, False otherwise.
371
+ """
372
+ api_path = get_api_path()
373
+ if not api_path:
374
+ return False
375
+
376
+ api_path_str = str(api_path)
377
+ if api_path_str not in sys.path:
378
+ sys.path.insert(0, api_path_str)
379
+
380
+ return True
@@ -0,0 +1 @@
1
+ """CLI commands."""
@@ -0,0 +1,225 @@
1
+ """Config command - manage CLI configuration."""
2
+
3
+ import typer
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+ from rich.progress import Progress, SpinnerColumn, TextColumn
7
+ from rich import print as rprint
8
+
9
+ from graphora.cli.config import (
10
+ load_config,
11
+ get_config_value,
12
+ set_config_value,
13
+ CONFIG_FILE,
14
+ ensure_config_dir,
15
+ is_embedded_available,
16
+ )
17
+ from graphora.cli.api_manager import (
18
+ is_api_available,
19
+ get_installed_version,
20
+ setup_api,
21
+ )
22
+
23
+ app = typer.Typer(help="Manage Graphora CLI configuration")
24
+ console = Console()
25
+
26
+
27
+ @app.command("set")
28
+ def config_set(
29
+ key: str = typer.Argument(..., help="Config key (e.g., llm.api_key)"),
30
+ value: str = typer.Argument(..., help="Value to set"),
31
+ ):
32
+ """Set a configuration value.
33
+
34
+ Examples:
35
+ graphora config set llm.api_key "your-api-key"
36
+ graphora config set llm.model "gemini-1.5-pro"
37
+ graphora config set defaults.mode "remote"
38
+ graphora config set api.url "https://api.graphora.io"
39
+ """
40
+ set_config_value(key, value)
41
+ rprint(f"[green]Set {key} = {_mask_sensitive(key, value)}[/green]")
42
+
43
+
44
+ @app.command("get")
45
+ def config_get(
46
+ key: str = typer.Argument(..., help="Config key to get"),
47
+ ):
48
+ """Get a configuration value.
49
+
50
+ Examples:
51
+ graphora config get llm.model
52
+ graphora config get defaults.mode
53
+ """
54
+ value = get_config_value(key)
55
+
56
+ if value is None:
57
+ rprint(f"[yellow]Key '{key}' is not set[/yellow]")
58
+ raise typer.Exit(1)
59
+
60
+ rprint(f"{key} = {_mask_sensitive(key, value)}")
61
+
62
+
63
+ @app.command("show")
64
+ def config_show():
65
+ """Show all configuration.
66
+
67
+ Displays all config values with sensitive data masked.
68
+ """
69
+ config = load_config()
70
+
71
+ table = Table(title="Graphora Configuration")
72
+ table.add_column("Key", style="cyan")
73
+ table.add_column("Value", style="green")
74
+
75
+ def add_rows(d: dict, prefix: str = ""):
76
+ for key, value in d.items():
77
+ full_key = f"{prefix}{key}" if prefix else key
78
+ if isinstance(value, dict):
79
+ add_rows(value, f"{full_key}.")
80
+ else:
81
+ display_value = _mask_sensitive(full_key, value)
82
+ table.add_row(full_key, str(display_value))
83
+
84
+ add_rows(config)
85
+
86
+ # Add embedded availability status
87
+ table.add_row("", "") # Empty row separator
88
+ embedded_status = "[green]available[/green]" if is_embedded_available() else "[yellow]not installed[/yellow]"
89
+ table.add_row("[dim]embedded mode[/dim]", embedded_status)
90
+
91
+ console.print(table)
92
+ rprint(f"\n[dim]Config file: {CONFIG_FILE}[/dim]")
93
+
94
+
95
+ @app.command("path")
96
+ def config_path():
97
+ """Show the config file path."""
98
+ ensure_config_dir()
99
+ rprint(f"[cyan]{CONFIG_FILE}[/cyan]")
100
+
101
+
102
+ @app.command("init")
103
+ def config_init(
104
+ mode: str = typer.Option(
105
+ None, "--mode", "-m", help="CLI mode: 'embedded' or 'remote'"
106
+ ),
107
+ api_key: str = typer.Option(
108
+ None, "--api-key", "-k", help="LLM API key (for embedded mode)"
109
+ ),
110
+ api_path: str = typer.Option(
111
+ None, "--api-path", "-p", help="Custom path to graphora-api (optional, auto-downloads by default)"
112
+ ),
113
+ api_url: str = typer.Option(
114
+ None, "--api-url", "-u", help="Remote API URL (for remote mode)"
115
+ ),
116
+ auth_token: str = typer.Option(
117
+ None, "--auth-token", "-t", help="Auth token (for remote mode)"
118
+ ),
119
+ skip_download: bool = typer.Option(
120
+ False, "--skip-download", help="Skip auto-download of graphora-api"
121
+ ),
122
+ ):
123
+ """Initialize configuration interactively.
124
+
125
+ For embedded mode, graphora-api is automatically downloaded and cached.
126
+ You only need to provide your Gemini API key.
127
+
128
+ Examples:
129
+ graphora config init --api-key "your-gemini-key"
130
+ graphora config init --mode remote --api-url "https://api.graphora.io" --auth-token "token"
131
+ graphora config init --api-path "/path/to/local/graphora-api" # Use local clone
132
+ """
133
+ import os
134
+
135
+ # Determine mode
136
+ if mode is None:
137
+ rprint("[cyan]Select execution mode:[/cyan]")
138
+ rprint(" [dim]embedded[/dim] - Run locally with Gemini API (recommended, auto-downloads)")
139
+ rprint(" [dim]remote[/dim] - Use hosted Graphora API (requires auth token)")
140
+ mode = typer.prompt(
141
+ "\nMode (embedded/remote)",
142
+ default="embedded",
143
+ )
144
+ if mode not in ["embedded", "remote"]:
145
+ rprint(f"[red]Invalid mode: {mode}. Must be 'embedded' or 'remote'[/red]")
146
+ raise typer.Exit(1)
147
+
148
+ set_config_value("defaults.mode", mode)
149
+
150
+ if mode == "embedded":
151
+ # Handle custom api_path if provided
152
+ if api_path:
153
+ api_path = os.path.expanduser(api_path)
154
+ if os.path.isdir(api_path):
155
+ set_config_value("embedded.api_path", api_path)
156
+ rprint(f"[green]Using custom graphora-api path: {api_path}[/green]")
157
+ else:
158
+ rprint(f"[red]Directory not found: {api_path}[/red]")
159
+ raise typer.Exit(1)
160
+ elif not skip_download and not is_api_available():
161
+ # Auto-download graphora-api
162
+ rprint("\n[cyan]Setting up graphora-api...[/cyan]")
163
+
164
+ with Progress(
165
+ SpinnerColumn(),
166
+ TextColumn("[progress.description]{task.description}"),
167
+ console=console,
168
+ ) as progress:
169
+ task = progress.add_task("Downloading graphora-api...", total=None)
170
+
171
+ def update_progress(msg):
172
+ progress.update(task, description=msg)
173
+
174
+ success, message = setup_api(progress_callback=update_progress)
175
+
176
+ if success:
177
+ rprint(f"[green]{message}[/green]")
178
+ else:
179
+ rprint(f"[red]{message}[/red]")
180
+ rprint("\nYou can manually specify a path with --api-path")
181
+ raise typer.Exit(1)
182
+
183
+ # Get API key
184
+ if api_key is None:
185
+ rprint("")
186
+ api_key = typer.prompt("Enter your Gemini API key")
187
+ set_config_value("llm.api_key", api_key)
188
+
189
+ # Always update model to current default (fixes stale configs with deprecated models)
190
+ from graphora.cli.config import DEFAULT_CONFIG
191
+ set_config_value("llm.model", DEFAULT_CONFIG["llm"]["model"])
192
+ set_config_value("llm.provider", DEFAULT_CONFIG["llm"]["provider"])
193
+
194
+ rprint("\n[green]Embedded mode configuration complete![/green]")
195
+ version = get_installed_version()
196
+ if version:
197
+ rprint(f" graphora-api: {version}")
198
+ else:
199
+ if api_url is None:
200
+ api_url = typer.prompt(
201
+ "Enter the Graphora API URL",
202
+ default="https://api.graphora.io"
203
+ )
204
+ if auth_token is None:
205
+ auth_token = typer.prompt("Enter your auth token")
206
+
207
+ set_config_value("api.url", api_url)
208
+ set_config_value("api.auth_token", auth_token)
209
+ rprint("\n[green]Remote mode configuration complete![/green]")
210
+
211
+ rprint("\nYou can now run:")
212
+ rprint(" [cyan]graphora extract document.pdf -o graph.json[/cyan]")
213
+
214
+
215
+ def _mask_sensitive(key: str, value) -> str:
216
+ """Mask sensitive values like API keys."""
217
+ sensitive_keys = ["api_key", "password", "secret", "token"]
218
+
219
+ if any(s in key.lower() for s in sensitive_keys):
220
+ if value and isinstance(value, str) and len(value) > 8:
221
+ return f"{value[:4]}...{value[-4:]}"
222
+ elif value:
223
+ return "****"
224
+
225
+ return str(value) if value is not None else "(not set)"