gibram 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gibram-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,314 @@
1
+ Metadata-Version: 2.4
2
+ Name: gibram
3
+ Version: 0.1.0
4
+ Summary: GraphRAG-style Python SDK for GibRAM - Graph in-Buffer Retrieval & Associative Memory
5
+ Author-email: GibRAM Team <support@gibram.io>
6
+ Maintainer-email: GibRAM Team <support@gibram.io>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/gibram-io/gibram
9
+ Project-URL: Documentation, https://gibram.io/docs/python-sdk
10
+ Project-URL: Repository, https://github.com/gibram-io/gibram
11
+ Project-URL: Issues, https://github.com/gibram-io/gibram/issues
12
+ Keywords: rag,graphrag,graph,knowledge-graph,vector-search,llm,ai,embeddings,entity-extraction
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.8
28
+ Description-Content-Type: text/markdown
29
+ Requires-Dist: protobuf>=3.20.0
30
+ Requires-Dist: openai>=1.0.0
31
+ Requires-Dist: tqdm>=4.65.0
32
+ Provides-Extra: dev
33
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
34
+ Requires-Dist: black>=23.0.0; extra == "dev"
35
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
36
+ Provides-Extra: anthropic
37
+ Requires-Dist: anthropic>=0.18.0; extra == "anthropic"
38
+
39
+ # GibRAM Python SDK v0.1.0
40
+
41
+ GraphRAG-style knowledge graph indexing with automatic entity extraction, relationship detection, and community discovery.
42
+
43
+ ## Installation
44
+
45
+ ```bash
46
+ cd sdk/python
47
+ pip install -e .
48
+ ```
49
+
50
+ ## Quick Start
51
+
52
+ ```python
53
+ from gibram import GibRAMIndexer
54
+
55
+ # Initialize indexer with OpenAI
56
+ indexer = GibRAMIndexer(
57
+ session_id="my-project",
58
+ llm_api_key="sk-...", # or set OPENAI_API_KEY env
59
+ )
60
+
61
+ # Index documents (automatic chunking, extraction, embedding)
62
+ stats = indexer.index_documents([
63
+ "Einstein was born in 1879 in Ulm, Germany.",
64
+ "He developed the theory of relativity in 1905.",
65
+ "Einstein received the Nobel Prize in Physics in 1921.",
66
+ ])
67
+
68
+ print(f"Indexed {stats.entities_extracted} entities in {stats.indexing_time_seconds:.2f}s")
69
+
70
+ # Query knowledge graph
71
+ result = indexer.query("Einstein's achievements", top_k=5)
72
+
73
+ for entity in result.entities:
74
+ print(f"{entity.title} ({entity.type}): {entity.description}")
75
+ ```
76
+
77
+ ## Configuration
78
+
79
+ ### Environment Variables
80
+
81
+ ```bash
82
+ export OPENAI_API_KEY="sk-..."
83
+ ```
84
+
85
+ ### Initialization Parameters
86
+
87
+ ```python
88
+ indexer = GibRAMIndexer(
89
+ # Required
90
+ session_id="unique-project-id",
91
+
92
+ # Server connection
93
+ host="localhost",
94
+ port=6161,
95
+
96
+ # LLM configuration
97
+ llm_provider="openai", # Only OpenAI supported in v0.1.0
98
+ llm_api_key="sk-...", # Auto-detect from OPENAI_API_KEY
99
+ llm_model="gpt-4o", # GPT-4o recommended
100
+
101
+ # Embedding configuration
102
+ embedding_provider="openai",
103
+ embedding_model="text-embedding-3-small",
104
+ embedding_dimensions=1536, # Must match server config
105
+
106
+ # Chunking configuration
107
+ chunk_size=512, # Tokens per chunk
108
+ chunk_overlap=50, # Overlap between chunks
109
+
110
+ # Community detection
111
+ auto_detect_communities=True, # Auto-run after indexing
112
+ community_resolution=1.0, # Leiden algorithm resolution
113
+ )
114
+ ```
115
+
116
+ ## API Reference
117
+
118
+ ### GibRAMIndexer
119
+
120
+ Main class for indexing and querying.
121
+
122
+ #### `index_documents(documents, batch_size=10, show_progress=True) -> IndexStats`
123
+
124
+ Index documents into knowledge graph.
125
+
126
+ **Arguments:**
127
+ - `documents`: List of strings or dicts `{"id": ..., "text": ..., "metadata": ...}`
128
+ - `batch_size`: Batch size for LLM/API calls (default: 10)
129
+ - `show_progress`: Show progress bar (default: True)
130
+
131
+ **Returns:** `IndexStats` with counts and timing
132
+
133
+ **Pipeline:**
134
+ 1. Chunk documents → TextUnits
135
+ 2. Extract entities & relationships (LLM)
136
+ 3. Generate embeddings
137
+ 4. Store in graph
138
+ 5. Link entities to text units
139
+ 6. Detect communities (if enabled)
140
+
141
+ **Example:**
142
+ ```python
143
+ stats = indexer.index_documents([
144
+ {"id": "doc1", "text": "...", "metadata": {"source": "wiki"}},
145
+ {"id": "doc2", "text": "..."},
146
+ ])
147
+ ```
148
+
149
+ #### `query(query, mode="local", top_k=10, include_entities=True, include_text_units=True, include_communities=False) -> QueryResult`
150
+
151
+ Query knowledge graph.
152
+
153
+ **Arguments:**
154
+ - `query`: Natural language query
155
+ - `mode`: Query mode (v0.1.0 only supports "local")
156
+ - `top_k`: Number of results (default: 10)
157
+ - `include_entities`: Include entity results
158
+ - `include_text_units`: Include text unit results
159
+ - `include_communities`: Include community results
160
+
161
+ **Returns:** `QueryResult` with scored results
162
+
163
+ **Example:**
164
+ ```python
165
+ result = indexer.query("machine learning applications", top_k=5)
166
+
167
+ for entity in result.entities:
168
+ print(f"{entity.title}: {entity.score:.3f}")
169
+
170
+ for text_unit in result.text_units:
171
+ print(f"{text_unit.content[:100]}... (score: {text_unit.score:.3f})")
172
+ ```
173
+
174
+ #### `get_stats() -> IndexStats`
175
+
176
+ Get current indexing statistics.
177
+
178
+ #### `close()`
179
+
180
+ Close connection to server.
181
+
182
+ ### Types
183
+
184
+ #### `IndexStats`
185
+
186
+ ```python
187
+ @dataclass
188
+ class IndexStats:
189
+ documents_indexed: int = 0
190
+ text_units_created: int = 0
191
+ entities_extracted: int = 0
192
+ relationships_extracted: int = 0
193
+ communities_detected: int = 0
194
+ indexing_time_seconds: float = 0.0
195
+ ```
196
+
197
+ #### `QueryResult`
198
+
199
+ ```python
200
+ @dataclass
201
+ class QueryResult:
202
+ entities: List[ScoredEntity]
203
+ text_units: List[ScoredTextUnit]
204
+ communities: List[ScoredCommunity]
205
+ execution_time_ms: float
206
+ ```
207
+
208
+ #### `ScoredEntity`
209
+
210
+ ```python
211
+ @dataclass
212
+ class ScoredEntity:
213
+ id: int
214
+ title: str
215
+ type: str
216
+ description: str
217
+ score: float # Similarity score
218
+ ```
219
+
220
+ ### Exceptions
221
+
222
+ All exceptions inherit from `GibRAMError`:
223
+
224
+ - `ConnectionError`: Server connection failed
225
+ - `TimeoutError`: Operation timed out
226
+ - `ProtocolError`: Protocol encoding/decoding error
227
+ - `ServerError`: Server returned error
228
+ - `NotFoundError`: Resource not found
229
+ - `ValidationError`: Input validation failed
230
+ - `ExtractionError`: LLM extraction failed
231
+ - `EmbeddingError`: Embedding generation failed
232
+ - `ConfigurationError`: Invalid configuration
233
+
234
+ ## Advanced Usage
235
+
236
+ ### Custom Extractors
237
+
238
+ Implement `BaseExtractor` for custom entity/relationship extraction:
239
+
240
+ ```python
241
+ from gibram.extractors import BaseExtractor
242
+ from gibram.types import ExtractedEntity, ExtractedRelationship
243
+
244
+ class MyExtractor(BaseExtractor):
245
+ def extract(self, text: str) -> tuple[list[ExtractedEntity], list[ExtractedRelationship]]:
246
+ # Your custom logic
247
+ entities = [...]
248
+ relationships = [...]
249
+ return entities, relationships
250
+
251
+ indexer = GibRAMIndexer(
252
+ session_id="custom",
253
+ extractor=MyExtractor(),
254
+ embedder=..., # Still need embedder
255
+ )
256
+ ```
257
+
258
+ ### Custom Embedders
259
+
260
+ Implement `BaseEmbedder` for custom embeddings:
261
+
262
+ ```python
263
+ from gibram.embedders import BaseEmbedder
264
+
265
+ class MyEmbedder(BaseEmbedder):
266
+ def embed(self, texts: list[str]) -> list[list[float]]:
267
+ # Your custom logic
268
+ return [[0.1, 0.2, ...], ...]
269
+
270
+ def embed_single(self, text: str) -> list[float]:
271
+ return self.embed([text])[0]
272
+
273
+ indexer = GibRAMIndexer(
274
+ session_id="custom",
275
+ embedder=MyEmbedder(),
276
+ )
277
+ ```
278
+
279
+ ### Context Manager
280
+
281
+ Use context manager for automatic cleanup:
282
+
283
+ ```python
284
+ with GibRAMIndexer(session_id="project") as indexer:
285
+ stats = indexer.index_documents(documents)
286
+ result = indexer.query("some query")
287
+ # Connection automatically closed
288
+ ```
289
+
290
+ ## Requirements
291
+
292
+ - Python 3.8+
293
+ - GibRAM server running (Docker recommended)
294
+ - OpenAI API key (for extraction & embeddings)
295
+
296
+ ## Server Setup
297
+
298
+ Start GibRAM server with Docker:
299
+
300
+ ```bash
301
+ docker run -d \
302
+ --name gibram-server \
303
+ -p 6161:6161 \
304
+ -e EMBEDDING_DIM=1536 \
305
+ gibram:latest
306
+ ```
307
+
308
+ ## License
309
+
310
+ MIT
311
+
312
+ ## Version
313
+
314
+ v0.1.0 - Initial release with OpenAI extraction & embeddings
gibram-0.1.0/README.md ADDED
@@ -0,0 +1,276 @@
1
+ # GibRAM Python SDK v0.1.0
2
+
3
+ GraphRAG-style knowledge graph indexing with automatic entity extraction, relationship detection, and community discovery.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ cd sdk/python
9
+ pip install -e .
10
+ ```
11
+
12
+ ## Quick Start
13
+
14
+ ```python
15
+ from gibram import GibRAMIndexer
16
+
17
+ # Initialize indexer with OpenAI
18
+ indexer = GibRAMIndexer(
19
+ session_id="my-project",
20
+ llm_api_key="sk-...", # or set OPENAI_API_KEY env
21
+ )
22
+
23
+ # Index documents (automatic chunking, extraction, embedding)
24
+ stats = indexer.index_documents([
25
+ "Einstein was born in 1879 in Ulm, Germany.",
26
+ "He developed the theory of relativity in 1905.",
27
+ "Einstein received the Nobel Prize in Physics in 1921.",
28
+ ])
29
+
30
+ print(f"Indexed {stats.entities_extracted} entities in {stats.indexing_time_seconds:.2f}s")
31
+
32
+ # Query knowledge graph
33
+ result = indexer.query("Einstein's achievements", top_k=5)
34
+
35
+ for entity in result.entities:
36
+ print(f"{entity.title} ({entity.type}): {entity.description}")
37
+ ```
38
+
39
+ ## Configuration
40
+
41
+ ### Environment Variables
42
+
43
+ ```bash
44
+ export OPENAI_API_KEY="sk-..."
45
+ ```
46
+
47
+ ### Initialization Parameters
48
+
49
+ ```python
50
+ indexer = GibRAMIndexer(
51
+ # Required
52
+ session_id="unique-project-id",
53
+
54
+ # Server connection
55
+ host="localhost",
56
+ port=6161,
57
+
58
+ # LLM configuration
59
+ llm_provider="openai", # Only OpenAI supported in v0.1.0
60
+ llm_api_key="sk-...", # Auto-detect from OPENAI_API_KEY
61
+ llm_model="gpt-4o", # GPT-4o recommended
62
+
63
+ # Embedding configuration
64
+ embedding_provider="openai",
65
+ embedding_model="text-embedding-3-small",
66
+ embedding_dimensions=1536, # Must match server config
67
+
68
+ # Chunking configuration
69
+ chunk_size=512, # Tokens per chunk
70
+ chunk_overlap=50, # Overlap between chunks
71
+
72
+ # Community detection
73
+ auto_detect_communities=True, # Auto-run after indexing
74
+ community_resolution=1.0, # Leiden algorithm resolution
75
+ )
76
+ ```
77
+
78
+ ## API Reference
79
+
80
+ ### GibRAMIndexer
81
+
82
+ Main class for indexing and querying.
83
+
84
+ #### `index_documents(documents, batch_size=10, show_progress=True) -> IndexStats`
85
+
86
+ Index documents into knowledge graph.
87
+
88
+ **Arguments:**
89
+ - `documents`: List of strings or dicts `{"id": ..., "text": ..., "metadata": ...}`
90
+ - `batch_size`: Batch size for LLM/API calls (default: 10)
91
+ - `show_progress`: Show progress bar (default: True)
92
+
93
+ **Returns:** `IndexStats` with counts and timing
94
+
95
+ **Pipeline:**
96
+ 1. Chunk documents → TextUnits
97
+ 2. Extract entities & relationships (LLM)
98
+ 3. Generate embeddings
99
+ 4. Store in graph
100
+ 5. Link entities to text units
101
+ 6. Detect communities (if enabled)
102
+
103
+ **Example:**
104
+ ```python
105
+ stats = indexer.index_documents([
106
+ {"id": "doc1", "text": "...", "metadata": {"source": "wiki"}},
107
+ {"id": "doc2", "text": "..."},
108
+ ])
109
+ ```
110
+
111
+ #### `query(query, mode="local", top_k=10, include_entities=True, include_text_units=True, include_communities=False) -> QueryResult`
112
+
113
+ Query knowledge graph.
114
+
115
+ **Arguments:**
116
+ - `query`: Natural language query
117
+ - `mode`: Query mode (v0.1.0 only supports "local")
118
+ - `top_k`: Number of results (default: 10)
119
+ - `include_entities`: Include entity results
120
+ - `include_text_units`: Include text unit results
121
+ - `include_communities`: Include community results
122
+
123
+ **Returns:** `QueryResult` with scored results
124
+
125
+ **Example:**
126
+ ```python
127
+ result = indexer.query("machine learning applications", top_k=5)
128
+
129
+ for entity in result.entities:
130
+ print(f"{entity.title}: {entity.score:.3f}")
131
+
132
+ for text_unit in result.text_units:
133
+ print(f"{text_unit.content[:100]}... (score: {text_unit.score:.3f})")
134
+ ```
135
+
136
+ #### `get_stats() -> IndexStats`
137
+
138
+ Get current indexing statistics.
139
+
140
+ #### `close()`
141
+
142
+ Close connection to server.
143
+
144
+ ### Types
145
+
146
+ #### `IndexStats`
147
+
148
+ ```python
149
+ @dataclass
150
+ class IndexStats:
151
+ documents_indexed: int = 0
152
+ text_units_created: int = 0
153
+ entities_extracted: int = 0
154
+ relationships_extracted: int = 0
155
+ communities_detected: int = 0
156
+ indexing_time_seconds: float = 0.0
157
+ ```
158
+
159
+ #### `QueryResult`
160
+
161
+ ```python
162
+ @dataclass
163
+ class QueryResult:
164
+ entities: List[ScoredEntity]
165
+ text_units: List[ScoredTextUnit]
166
+ communities: List[ScoredCommunity]
167
+ execution_time_ms: float
168
+ ```
169
+
170
+ #### `ScoredEntity`
171
+
172
+ ```python
173
+ @dataclass
174
+ class ScoredEntity:
175
+ id: int
176
+ title: str
177
+ type: str
178
+ description: str
179
+ score: float # Similarity score
180
+ ```
181
+
182
+ ### Exceptions
183
+
184
+ All exceptions inherit from `GibRAMError`:
185
+
186
+ - `ConnectionError`: Server connection failed
187
+ - `TimeoutError`: Operation timed out
188
+ - `ProtocolError`: Protocol encoding/decoding error
189
+ - `ServerError`: Server returned error
190
+ - `NotFoundError`: Resource not found
191
+ - `ValidationError`: Input validation failed
192
+ - `ExtractionError`: LLM extraction failed
193
+ - `EmbeddingError`: Embedding generation failed
194
+ - `ConfigurationError`: Invalid configuration
195
+
196
+ ## Advanced Usage
197
+
198
+ ### Custom Extractors
199
+
200
+ Implement `BaseExtractor` for custom entity/relationship extraction:
201
+
202
+ ```python
203
+ from gibram.extractors import BaseExtractor
204
+ from gibram.types import ExtractedEntity, ExtractedRelationship
205
+
206
+ class MyExtractor(BaseExtractor):
207
+ def extract(self, text: str) -> tuple[list[ExtractedEntity], list[ExtractedRelationship]]:
208
+ # Your custom logic
209
+ entities = [...]
210
+ relationships = [...]
211
+ return entities, relationships
212
+
213
+ indexer = GibRAMIndexer(
214
+ session_id="custom",
215
+ extractor=MyExtractor(),
216
+ embedder=..., # Still need embedder
217
+ )
218
+ ```
219
+
220
+ ### Custom Embedders
221
+
222
+ Implement `BaseEmbedder` for custom embeddings:
223
+
224
+ ```python
225
+ from gibram.embedders import BaseEmbedder
226
+
227
+ class MyEmbedder(BaseEmbedder):
228
+ def embed(self, texts: list[str]) -> list[list[float]]:
229
+ # Your custom logic
230
+ return [[0.1, 0.2, ...], ...]
231
+
232
+ def embed_single(self, text: str) -> list[float]:
233
+ return self.embed([text])[0]
234
+
235
+ indexer = GibRAMIndexer(
236
+ session_id="custom",
237
+ embedder=MyEmbedder(),
238
+ )
239
+ ```
240
+
241
+ ### Context Manager
242
+
243
+ Use context manager for automatic cleanup:
244
+
245
+ ```python
246
+ with GibRAMIndexer(session_id="project") as indexer:
247
+ stats = indexer.index_documents(documents)
248
+ result = indexer.query("some query")
249
+ # Connection automatically closed
250
+ ```
251
+
252
+ ## Requirements
253
+
254
+ - Python 3.8+
255
+ - GibRAM server running (Docker recommended)
256
+ - OpenAI API key (for extraction & embeddings)
257
+
258
+ ## Server Setup
259
+
260
+ Start GibRAM server with Docker:
261
+
262
+ ```bash
263
+ docker run -d \
264
+ --name gibram-server \
265
+ -p 6161:6161 \
266
+ -e EMBEDDING_DIM=1536 \
267
+ gibram:latest
268
+ ```
269
+
270
+ ## License
271
+
272
+ MIT
273
+
274
+ ## Version
275
+
276
+ v0.1.0 - Initial release with OpenAI extraction & embeddings
@@ -0,0 +1,51 @@
1
+ """GibRAM Python SDK - GraphRAG-style knowledge graph indexing."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from .indexer import GibRAMIndexer
6
+ from .types import (
7
+ IndexStats,
8
+ QueryResult,
9
+ ScoredEntity,
10
+ ScoredTextUnit,
11
+ ScoredCommunity,
12
+ ExtractedEntity,
13
+ ExtractedRelationship,
14
+ )
15
+ from .exceptions import (
16
+ GibRAMError,
17
+ ConnectionError,
18
+ TimeoutError,
19
+ ProtocolError,
20
+ ServerError,
21
+ NotFoundError,
22
+ ValidationError,
23
+ ExtractionError,
24
+ EmbeddingError,
25
+ ConfigurationError,
26
+ )
27
+
28
+ __all__ = [
29
+ # Main API
30
+ "GibRAMIndexer",
31
+ # Return types
32
+ "IndexStats",
33
+ "QueryResult",
34
+ "ScoredEntity",
35
+ "ScoredTextUnit",
36
+ "ScoredCommunity",
37
+ # For advanced users
38
+ "ExtractedEntity",
39
+ "ExtractedRelationship",
40
+ # Exceptions
41
+ "GibRAMError",
42
+ "ConnectionError",
43
+ "TimeoutError",
44
+ "ProtocolError",
45
+ "ServerError",
46
+ "NotFoundError",
47
+ "ValidationError",
48
+ "ExtractionError",
49
+ "EmbeddingError",
50
+ "ConfigurationError",
51
+ ]