gibram 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gibram-0.1.0/PKG-INFO +314 -0
- gibram-0.1.0/README.md +276 -0
- gibram-0.1.0/gibram/__init__.py +51 -0
- gibram-0.1.0/gibram/_client.py +253 -0
- gibram-0.1.0/gibram/_connection.py +81 -0
- gibram-0.1.0/gibram/_protocol.py +301 -0
- gibram-0.1.0/gibram/chunkers/__init__.py +6 -0
- gibram-0.1.0/gibram/chunkers/base.py +21 -0
- gibram-0.1.0/gibram/chunkers/token.py +86 -0
- gibram-0.1.0/gibram/embedders/__init__.py +6 -0
- gibram-0.1.0/gibram/embedders/base.py +34 -0
- gibram-0.1.0/gibram/embedders/openai.py +127 -0
- gibram-0.1.0/gibram/exceptions.py +61 -0
- gibram-0.1.0/gibram/extractors/__init__.py +6 -0
- gibram-0.1.0/gibram/extractors/base.py +24 -0
- gibram-0.1.0/gibram/extractors/openai.py +167 -0
- gibram-0.1.0/gibram/indexer.py +479 -0
- gibram-0.1.0/gibram/proto/__init__.py +3 -0
- gibram-0.1.0/gibram/proto/gibram_pb2.py +173 -0
- gibram-0.1.0/gibram/py.typed +0 -0
- gibram-0.1.0/gibram/types.py +78 -0
- gibram-0.1.0/gibram.egg-info/PKG-INFO +314 -0
- gibram-0.1.0/gibram.egg-info/SOURCES.txt +26 -0
- gibram-0.1.0/gibram.egg-info/dependency_links.txt +1 -0
- gibram-0.1.0/gibram.egg-info/requires.txt +11 -0
- gibram-0.1.0/gibram.egg-info/top_level.txt +1 -0
- gibram-0.1.0/pyproject.toml +84 -0
- gibram-0.1.0/setup.cfg +4 -0
gibram-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gibram
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: GraphRAG-style Python SDK for GibRAM - Graph in-Buffer Retrieval & Associative Memory
|
|
5
|
+
Author-email: GibRAM Team <support@gibram.io>
|
|
6
|
+
Maintainer-email: GibRAM Team <support@gibram.io>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/gibram-io/gibram
|
|
9
|
+
Project-URL: Documentation, https://gibram.io/docs/python-sdk
|
|
10
|
+
Project-URL: Repository, https://github.com/gibram-io/gibram
|
|
11
|
+
Project-URL: Issues, https://github.com/gibram-io/gibram/issues
|
|
12
|
+
Keywords: rag,graphrag,graph,knowledge-graph,vector-search,llm,ai,embeddings,entity-extraction
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.8
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
Requires-Dist: protobuf>=3.20.0
|
|
30
|
+
Requires-Dist: openai>=1.0.0
|
|
31
|
+
Requires-Dist: tqdm>=4.65.0
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
34
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
36
|
+
Provides-Extra: anthropic
|
|
37
|
+
Requires-Dist: anthropic>=0.18.0; extra == "anthropic"
|
|
38
|
+
|
|
39
|
+
# GibRAM Python SDK v0.1.0
|
|
40
|
+
|
|
41
|
+
GraphRAG-style knowledge graph indexing with automatic entity extraction, relationship detection, and community discovery.
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
cd sdk/python
|
|
47
|
+
pip install -e .
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Quick Start
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from gibram import GibRAMIndexer
|
|
54
|
+
|
|
55
|
+
# Initialize indexer with OpenAI
|
|
56
|
+
indexer = GibRAMIndexer(
|
|
57
|
+
session_id="my-project",
|
|
58
|
+
llm_api_key="sk-...", # or set OPENAI_API_KEY env
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Index documents (automatic chunking, extraction, embedding)
|
|
62
|
+
stats = indexer.index_documents([
|
|
63
|
+
"Einstein was born in 1879 in Ulm, Germany.",
|
|
64
|
+
"He developed the theory of relativity in 1905.",
|
|
65
|
+
"Einstein received the Nobel Prize in Physics in 1921.",
|
|
66
|
+
])
|
|
67
|
+
|
|
68
|
+
print(f"Indexed {stats.entities_extracted} entities in {stats.indexing_time_seconds:.2f}s")
|
|
69
|
+
|
|
70
|
+
# Query knowledge graph
|
|
71
|
+
result = indexer.query("Einstein's achievements", top_k=5)
|
|
72
|
+
|
|
73
|
+
for entity in result.entities:
|
|
74
|
+
print(f"{entity.title} ({entity.type}): {entity.description}")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Configuration
|
|
78
|
+
|
|
79
|
+
### Environment Variables
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
export OPENAI_API_KEY="sk-..."
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Initialization Parameters
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
indexer = GibRAMIndexer(
|
|
89
|
+
# Required
|
|
90
|
+
session_id="unique-project-id",
|
|
91
|
+
|
|
92
|
+
# Server connection
|
|
93
|
+
host="localhost",
|
|
94
|
+
port=6161,
|
|
95
|
+
|
|
96
|
+
# LLM configuration
|
|
97
|
+
llm_provider="openai", # Only OpenAI supported in v0.1.0
|
|
98
|
+
llm_api_key="sk-...", # Auto-detect from OPENAI_API_KEY
|
|
99
|
+
llm_model="gpt-4o", # GPT-4o recommended
|
|
100
|
+
|
|
101
|
+
# Embedding configuration
|
|
102
|
+
embedding_provider="openai",
|
|
103
|
+
embedding_model="text-embedding-3-small",
|
|
104
|
+
embedding_dimensions=1536, # Must match server config
|
|
105
|
+
|
|
106
|
+
# Chunking configuration
|
|
107
|
+
chunk_size=512, # Tokens per chunk
|
|
108
|
+
chunk_overlap=50, # Overlap between chunks
|
|
109
|
+
|
|
110
|
+
# Community detection
|
|
111
|
+
auto_detect_communities=True, # Auto-run after indexing
|
|
112
|
+
community_resolution=1.0, # Leiden algorithm resolution
|
|
113
|
+
)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## API Reference
|
|
117
|
+
|
|
118
|
+
### GibRAMIndexer
|
|
119
|
+
|
|
120
|
+
Main class for indexing and querying.
|
|
121
|
+
|
|
122
|
+
#### `index_documents(documents, batch_size=10, show_progress=True) -> IndexStats`
|
|
123
|
+
|
|
124
|
+
Index documents into knowledge graph.
|
|
125
|
+
|
|
126
|
+
**Arguments:**
|
|
127
|
+
- `documents`: List of strings or dicts `{"id": ..., "text": ..., "metadata": ...}`
|
|
128
|
+
- `batch_size`: Batch size for LLM/API calls (default: 10)
|
|
129
|
+
- `show_progress`: Show progress bar (default: True)
|
|
130
|
+
|
|
131
|
+
**Returns:** `IndexStats` with counts and timing
|
|
132
|
+
|
|
133
|
+
**Pipeline:**
|
|
134
|
+
1. Chunk documents → TextUnits
|
|
135
|
+
2. Extract entities & relationships (LLM)
|
|
136
|
+
3. Generate embeddings
|
|
137
|
+
4. Store in graph
|
|
138
|
+
5. Link entities to text units
|
|
139
|
+
6. Detect communities (if enabled)
|
|
140
|
+
|
|
141
|
+
**Example:**
|
|
142
|
+
```python
|
|
143
|
+
stats = indexer.index_documents([
|
|
144
|
+
{"id": "doc1", "text": "...", "metadata": {"source": "wiki"}},
|
|
145
|
+
{"id": "doc2", "text": "..."},
|
|
146
|
+
])
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
#### `query(query, mode="local", top_k=10, include_entities=True, include_text_units=True, include_communities=False) -> QueryResult`
|
|
150
|
+
|
|
151
|
+
Query knowledge graph.
|
|
152
|
+
|
|
153
|
+
**Arguments:**
|
|
154
|
+
- `query`: Natural language query
|
|
155
|
+
- `mode`: Query mode (v0.1.0 only supports "local")
|
|
156
|
+
- `top_k`: Number of results (default: 10)
|
|
157
|
+
- `include_entities`: Include entity results
|
|
158
|
+
- `include_text_units`: Include text unit results
|
|
159
|
+
- `include_communities`: Include community results
|
|
160
|
+
|
|
161
|
+
**Returns:** `QueryResult` with scored results
|
|
162
|
+
|
|
163
|
+
**Example:**
|
|
164
|
+
```python
|
|
165
|
+
result = indexer.query("machine learning applications", top_k=5)
|
|
166
|
+
|
|
167
|
+
for entity in result.entities:
|
|
168
|
+
print(f"{entity.title}: {entity.score:.3f}")
|
|
169
|
+
|
|
170
|
+
for text_unit in result.text_units:
|
|
171
|
+
print(f"{text_unit.content[:100]}... (score: {text_unit.score:.3f})")
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
#### `get_stats() -> IndexStats`
|
|
175
|
+
|
|
176
|
+
Get current indexing statistics.
|
|
177
|
+
|
|
178
|
+
#### `close()`
|
|
179
|
+
|
|
180
|
+
Close connection to server.
|
|
181
|
+
|
|
182
|
+
### Types
|
|
183
|
+
|
|
184
|
+
#### `IndexStats`
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
@dataclass
|
|
188
|
+
class IndexStats:
|
|
189
|
+
documents_indexed: int = 0
|
|
190
|
+
text_units_created: int = 0
|
|
191
|
+
entities_extracted: int = 0
|
|
192
|
+
relationships_extracted: int = 0
|
|
193
|
+
communities_detected: int = 0
|
|
194
|
+
indexing_time_seconds: float = 0.0
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
#### `QueryResult`
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
@dataclass
|
|
201
|
+
class QueryResult:
|
|
202
|
+
entities: List[ScoredEntity]
|
|
203
|
+
text_units: List[ScoredTextUnit]
|
|
204
|
+
communities: List[ScoredCommunity]
|
|
205
|
+
execution_time_ms: float
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
#### `ScoredEntity`
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
@dataclass
|
|
212
|
+
class ScoredEntity:
|
|
213
|
+
id: int
|
|
214
|
+
title: str
|
|
215
|
+
type: str
|
|
216
|
+
description: str
|
|
217
|
+
score: float # Similarity score
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Exceptions
|
|
221
|
+
|
|
222
|
+
All exceptions inherit from `GibRAMError`:
|
|
223
|
+
|
|
224
|
+
- `ConnectionError`: Server connection failed
|
|
225
|
+
- `TimeoutError`: Operation timed out
|
|
226
|
+
- `ProtocolError`: Protocol encoding/decoding error
|
|
227
|
+
- `ServerError`: Server returned error
|
|
228
|
+
- `NotFoundError`: Resource not found
|
|
229
|
+
- `ValidationError`: Input validation failed
|
|
230
|
+
- `ExtractionError`: LLM extraction failed
|
|
231
|
+
- `EmbeddingError`: Embedding generation failed
|
|
232
|
+
- `ConfigurationError`: Invalid configuration
|
|
233
|
+
|
|
234
|
+
## Advanced Usage
|
|
235
|
+
|
|
236
|
+
### Custom Extractors
|
|
237
|
+
|
|
238
|
+
Implement `BaseExtractor` for custom entity/relationship extraction:
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
from gibram.extractors import BaseExtractor
|
|
242
|
+
from gibram.types import ExtractedEntity, ExtractedRelationship
|
|
243
|
+
|
|
244
|
+
class MyExtractor(BaseExtractor):
|
|
245
|
+
def extract(self, text: str) -> tuple[list[ExtractedEntity], list[ExtractedRelationship]]:
|
|
246
|
+
# Your custom logic
|
|
247
|
+
entities = [...]
|
|
248
|
+
relationships = [...]
|
|
249
|
+
return entities, relationships
|
|
250
|
+
|
|
251
|
+
indexer = GibRAMIndexer(
|
|
252
|
+
session_id="custom",
|
|
253
|
+
extractor=MyExtractor(),
|
|
254
|
+
embedder=..., # Still need embedder
|
|
255
|
+
)
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### Custom Embedders
|
|
259
|
+
|
|
260
|
+
Implement `BaseEmbedder` for custom embeddings:
|
|
261
|
+
|
|
262
|
+
```python
|
|
263
|
+
from gibram.embedders import BaseEmbedder
|
|
264
|
+
|
|
265
|
+
class MyEmbedder(BaseEmbedder):
|
|
266
|
+
def embed(self, texts: list[str]) -> list[list[float]]:
|
|
267
|
+
# Your custom logic
|
|
268
|
+
return [[0.1, 0.2, ...], ...]
|
|
269
|
+
|
|
270
|
+
def embed_single(self, text: str) -> list[float]:
|
|
271
|
+
return self.embed([text])[0]
|
|
272
|
+
|
|
273
|
+
indexer = GibRAMIndexer(
|
|
274
|
+
session_id="custom",
|
|
275
|
+
embedder=MyEmbedder(),
|
|
276
|
+
)
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
### Context Manager
|
|
280
|
+
|
|
281
|
+
Use context manager for automatic cleanup:
|
|
282
|
+
|
|
283
|
+
```python
|
|
284
|
+
with GibRAMIndexer(session_id="project") as indexer:
|
|
285
|
+
stats = indexer.index_documents(documents)
|
|
286
|
+
result = indexer.query("some query")
|
|
287
|
+
# Connection automatically closed
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Requirements
|
|
291
|
+
|
|
292
|
+
- Python 3.8+
|
|
293
|
+
- GibRAM server running (Docker recommended)
|
|
294
|
+
- OpenAI API key (for extraction & embeddings)
|
|
295
|
+
|
|
296
|
+
## Server Setup
|
|
297
|
+
|
|
298
|
+
Start GibRAM server with Docker:
|
|
299
|
+
|
|
300
|
+
```bash
|
|
301
|
+
docker run -d \
|
|
302
|
+
--name gibram-server \
|
|
303
|
+
-p 6161:6161 \
|
|
304
|
+
-e EMBEDDING_DIM=1536 \
|
|
305
|
+
gibram:latest
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## License
|
|
309
|
+
|
|
310
|
+
MIT
|
|
311
|
+
|
|
312
|
+
## Version
|
|
313
|
+
|
|
314
|
+
v0.1.0 - Initial release with OpenAI extraction & embeddings
|
gibram-0.1.0/README.md
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
# GibRAM Python SDK v0.1.0
|
|
2
|
+
|
|
3
|
+
GraphRAG-style knowledge graph indexing with automatic entity extraction, relationship detection, and community discovery.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
cd sdk/python
|
|
9
|
+
pip install -e .
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Quick Start
|
|
13
|
+
|
|
14
|
+
```python
|
|
15
|
+
from gibram import GibRAMIndexer
|
|
16
|
+
|
|
17
|
+
# Initialize indexer with OpenAI
|
|
18
|
+
indexer = GibRAMIndexer(
|
|
19
|
+
session_id="my-project",
|
|
20
|
+
llm_api_key="sk-...", # or set OPENAI_API_KEY env
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Index documents (automatic chunking, extraction, embedding)
|
|
24
|
+
stats = indexer.index_documents([
|
|
25
|
+
"Einstein was born in 1879 in Ulm, Germany.",
|
|
26
|
+
"He developed the theory of relativity in 1905.",
|
|
27
|
+
"Einstein received the Nobel Prize in Physics in 1921.",
|
|
28
|
+
])
|
|
29
|
+
|
|
30
|
+
print(f"Indexed {stats.entities_extracted} entities in {stats.indexing_time_seconds:.2f}s")
|
|
31
|
+
|
|
32
|
+
# Query knowledge graph
|
|
33
|
+
result = indexer.query("Einstein's achievements", top_k=5)
|
|
34
|
+
|
|
35
|
+
for entity in result.entities:
|
|
36
|
+
print(f"{entity.title} ({entity.type}): {entity.description}")
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Configuration
|
|
40
|
+
|
|
41
|
+
### Environment Variables
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
export OPENAI_API_KEY="sk-..."
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Initialization Parameters
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
indexer = GibRAMIndexer(
|
|
51
|
+
# Required
|
|
52
|
+
session_id="unique-project-id",
|
|
53
|
+
|
|
54
|
+
# Server connection
|
|
55
|
+
host="localhost",
|
|
56
|
+
port=6161,
|
|
57
|
+
|
|
58
|
+
# LLM configuration
|
|
59
|
+
llm_provider="openai", # Only OpenAI supported in v0.1.0
|
|
60
|
+
llm_api_key="sk-...", # Auto-detect from OPENAI_API_KEY
|
|
61
|
+
llm_model="gpt-4o", # GPT-4o recommended
|
|
62
|
+
|
|
63
|
+
# Embedding configuration
|
|
64
|
+
embedding_provider="openai",
|
|
65
|
+
embedding_model="text-embedding-3-small",
|
|
66
|
+
embedding_dimensions=1536, # Must match server config
|
|
67
|
+
|
|
68
|
+
# Chunking configuration
|
|
69
|
+
chunk_size=512, # Tokens per chunk
|
|
70
|
+
chunk_overlap=50, # Overlap between chunks
|
|
71
|
+
|
|
72
|
+
# Community detection
|
|
73
|
+
auto_detect_communities=True, # Auto-run after indexing
|
|
74
|
+
community_resolution=1.0, # Leiden algorithm resolution
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## API Reference
|
|
79
|
+
|
|
80
|
+
### GibRAMIndexer
|
|
81
|
+
|
|
82
|
+
Main class for indexing and querying.
|
|
83
|
+
|
|
84
|
+
#### `index_documents(documents, batch_size=10, show_progress=True) -> IndexStats`
|
|
85
|
+
|
|
86
|
+
Index documents into knowledge graph.
|
|
87
|
+
|
|
88
|
+
**Arguments:**
|
|
89
|
+
- `documents`: List of strings or dicts `{"id": ..., "text": ..., "metadata": ...}`
|
|
90
|
+
- `batch_size`: Batch size for LLM/API calls (default: 10)
|
|
91
|
+
- `show_progress`: Show progress bar (default: True)
|
|
92
|
+
|
|
93
|
+
**Returns:** `IndexStats` with counts and timing
|
|
94
|
+
|
|
95
|
+
**Pipeline:**
|
|
96
|
+
1. Chunk documents → TextUnits
|
|
97
|
+
2. Extract entities & relationships (LLM)
|
|
98
|
+
3. Generate embeddings
|
|
99
|
+
4. Store in graph
|
|
100
|
+
5. Link entities to text units
|
|
101
|
+
6. Detect communities (if enabled)
|
|
102
|
+
|
|
103
|
+
**Example:**
|
|
104
|
+
```python
|
|
105
|
+
stats = indexer.index_documents([
|
|
106
|
+
{"id": "doc1", "text": "...", "metadata": {"source": "wiki"}},
|
|
107
|
+
{"id": "doc2", "text": "..."},
|
|
108
|
+
])
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
#### `query(query, mode="local", top_k=10, include_entities=True, include_text_units=True, include_communities=False) -> QueryResult`
|
|
112
|
+
|
|
113
|
+
Query knowledge graph.
|
|
114
|
+
|
|
115
|
+
**Arguments:**
|
|
116
|
+
- `query`: Natural language query
|
|
117
|
+
- `mode`: Query mode (v0.1.0 only supports "local")
|
|
118
|
+
- `top_k`: Number of results (default: 10)
|
|
119
|
+
- `include_entities`: Include entity results
|
|
120
|
+
- `include_text_units`: Include text unit results
|
|
121
|
+
- `include_communities`: Include community results
|
|
122
|
+
|
|
123
|
+
**Returns:** `QueryResult` with scored results
|
|
124
|
+
|
|
125
|
+
**Example:**
|
|
126
|
+
```python
|
|
127
|
+
result = indexer.query("machine learning applications", top_k=5)
|
|
128
|
+
|
|
129
|
+
for entity in result.entities:
|
|
130
|
+
print(f"{entity.title}: {entity.score:.3f}")
|
|
131
|
+
|
|
132
|
+
for text_unit in result.text_units:
|
|
133
|
+
print(f"{text_unit.content[:100]}... (score: {text_unit.score:.3f})")
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
#### `get_stats() -> IndexStats`
|
|
137
|
+
|
|
138
|
+
Get current indexing statistics.
|
|
139
|
+
|
|
140
|
+
#### `close()`
|
|
141
|
+
|
|
142
|
+
Close connection to server.
|
|
143
|
+
|
|
144
|
+
### Types
|
|
145
|
+
|
|
146
|
+
#### `IndexStats`
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
@dataclass
|
|
150
|
+
class IndexStats:
|
|
151
|
+
documents_indexed: int = 0
|
|
152
|
+
text_units_created: int = 0
|
|
153
|
+
entities_extracted: int = 0
|
|
154
|
+
relationships_extracted: int = 0
|
|
155
|
+
communities_detected: int = 0
|
|
156
|
+
indexing_time_seconds: float = 0.0
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
#### `QueryResult`
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
@dataclass
|
|
163
|
+
class QueryResult:
|
|
164
|
+
entities: List[ScoredEntity]
|
|
165
|
+
text_units: List[ScoredTextUnit]
|
|
166
|
+
communities: List[ScoredCommunity]
|
|
167
|
+
execution_time_ms: float
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
#### `ScoredEntity`
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
@dataclass
|
|
174
|
+
class ScoredEntity:
|
|
175
|
+
id: int
|
|
176
|
+
title: str
|
|
177
|
+
type: str
|
|
178
|
+
description: str
|
|
179
|
+
score: float # Similarity score
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Exceptions
|
|
183
|
+
|
|
184
|
+
All exceptions inherit from `GibRAMError`:
|
|
185
|
+
|
|
186
|
+
- `ConnectionError`: Server connection failed
|
|
187
|
+
- `TimeoutError`: Operation timed out
|
|
188
|
+
- `ProtocolError`: Protocol encoding/decoding error
|
|
189
|
+
- `ServerError`: Server returned error
|
|
190
|
+
- `NotFoundError`: Resource not found
|
|
191
|
+
- `ValidationError`: Input validation failed
|
|
192
|
+
- `ExtractionError`: LLM extraction failed
|
|
193
|
+
- `EmbeddingError`: Embedding generation failed
|
|
194
|
+
- `ConfigurationError`: Invalid configuration
|
|
195
|
+
|
|
196
|
+
## Advanced Usage
|
|
197
|
+
|
|
198
|
+
### Custom Extractors
|
|
199
|
+
|
|
200
|
+
Implement `BaseExtractor` for custom entity/relationship extraction:
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
from gibram.extractors import BaseExtractor
|
|
204
|
+
from gibram.types import ExtractedEntity, ExtractedRelationship
|
|
205
|
+
|
|
206
|
+
class MyExtractor(BaseExtractor):
|
|
207
|
+
def extract(self, text: str) -> tuple[list[ExtractedEntity], list[ExtractedRelationship]]:
|
|
208
|
+
# Your custom logic
|
|
209
|
+
entities = [...]
|
|
210
|
+
relationships = [...]
|
|
211
|
+
return entities, relationships
|
|
212
|
+
|
|
213
|
+
indexer = GibRAMIndexer(
|
|
214
|
+
session_id="custom",
|
|
215
|
+
extractor=MyExtractor(),
|
|
216
|
+
embedder=..., # Still need embedder
|
|
217
|
+
)
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Custom Embedders
|
|
221
|
+
|
|
222
|
+
Implement `BaseEmbedder` for custom embeddings:
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
from gibram.embedders import BaseEmbedder
|
|
226
|
+
|
|
227
|
+
class MyEmbedder(BaseEmbedder):
|
|
228
|
+
def embed(self, texts: list[str]) -> list[list[float]]:
|
|
229
|
+
# Your custom logic
|
|
230
|
+
return [[0.1, 0.2, ...], ...]
|
|
231
|
+
|
|
232
|
+
def embed_single(self, text: str) -> list[float]:
|
|
233
|
+
return self.embed([text])[0]
|
|
234
|
+
|
|
235
|
+
indexer = GibRAMIndexer(
|
|
236
|
+
session_id="custom",
|
|
237
|
+
embedder=MyEmbedder(),
|
|
238
|
+
)
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### Context Manager
|
|
242
|
+
|
|
243
|
+
Use context manager for automatic cleanup:
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
with GibRAMIndexer(session_id="project") as indexer:
|
|
247
|
+
stats = indexer.index_documents(documents)
|
|
248
|
+
result = indexer.query("some query")
|
|
249
|
+
# Connection automatically closed
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## Requirements
|
|
253
|
+
|
|
254
|
+
- Python 3.8+
|
|
255
|
+
- GibRAM server running (Docker recommended)
|
|
256
|
+
- OpenAI API key (for extraction & embeddings)
|
|
257
|
+
|
|
258
|
+
## Server Setup
|
|
259
|
+
|
|
260
|
+
Start GibRAM server with Docker:
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
docker run -d \
|
|
264
|
+
--name gibram-server \
|
|
265
|
+
-p 6161:6161 \
|
|
266
|
+
-e EMBEDDING_DIM=1536 \
|
|
267
|
+
gibram:latest
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
## License
|
|
271
|
+
|
|
272
|
+
MIT
|
|
273
|
+
|
|
274
|
+
## Version
|
|
275
|
+
|
|
276
|
+
v0.1.0 - Initial release with OpenAI extraction & embeddings
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""GibRAM Python SDK - GraphRAG-style knowledge graph indexing."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from .indexer import GibRAMIndexer
|
|
6
|
+
from .types import (
|
|
7
|
+
IndexStats,
|
|
8
|
+
QueryResult,
|
|
9
|
+
ScoredEntity,
|
|
10
|
+
ScoredTextUnit,
|
|
11
|
+
ScoredCommunity,
|
|
12
|
+
ExtractedEntity,
|
|
13
|
+
ExtractedRelationship,
|
|
14
|
+
)
|
|
15
|
+
from .exceptions import (
|
|
16
|
+
GibRAMError,
|
|
17
|
+
ConnectionError,
|
|
18
|
+
TimeoutError,
|
|
19
|
+
ProtocolError,
|
|
20
|
+
ServerError,
|
|
21
|
+
NotFoundError,
|
|
22
|
+
ValidationError,
|
|
23
|
+
ExtractionError,
|
|
24
|
+
EmbeddingError,
|
|
25
|
+
ConfigurationError,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
# Main API
|
|
30
|
+
"GibRAMIndexer",
|
|
31
|
+
# Return types
|
|
32
|
+
"IndexStats",
|
|
33
|
+
"QueryResult",
|
|
34
|
+
"ScoredEntity",
|
|
35
|
+
"ScoredTextUnit",
|
|
36
|
+
"ScoredCommunity",
|
|
37
|
+
# For advanced users
|
|
38
|
+
"ExtractedEntity",
|
|
39
|
+
"ExtractedRelationship",
|
|
40
|
+
# Exceptions
|
|
41
|
+
"GibRAMError",
|
|
42
|
+
"ConnectionError",
|
|
43
|
+
"TimeoutError",
|
|
44
|
+
"ProtocolError",
|
|
45
|
+
"ServerError",
|
|
46
|
+
"NotFoundError",
|
|
47
|
+
"ValidationError",
|
|
48
|
+
"ExtractionError",
|
|
49
|
+
"EmbeddingError",
|
|
50
|
+
"ConfigurationError",
|
|
51
|
+
]
|