maris 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. maris-0.1.0/LICENSE +21 -0
  2. maris-0.1.0/PKG-INFO +686 -0
  3. maris-0.1.0/README.md +637 -0
  4. maris-0.1.0/pyproject.toml +172 -0
  5. maris-0.1.0/setup.cfg +4 -0
  6. maris-0.1.0/src/maris/__init__.py +20 -0
  7. maris-0.1.0/src/maris/agents/__init__.py +17 -0
  8. maris-0.1.0/src/maris/agents/documentation_agent.py +545 -0
  9. maris-0.1.0/src/maris/agents/git_agent.py +404 -0
  10. maris-0.1.0/src/maris/agents/impact_analysis_agent.py +592 -0
  11. maris-0.1.0/src/maris/agents/indexing_agent.py +760 -0
  12. maris-0.1.0/src/maris/agents/orchestrator_agent.py +640 -0
  13. maris-0.1.0/src/maris/agents/qa_agent.py +468 -0
  14. maris-0.1.0/src/maris/cli/__init__.py +7 -0
  15. maris-0.1.0/src/maris/cli/main.py +870 -0
  16. maris-0.1.0/src/maris/config/__init__.py +7 -0
  17. maris-0.1.0/src/maris/config/settings.py +214 -0
  18. maris-0.1.0/src/maris/core/__init__.py +23 -0
  19. maris-0.1.0/src/maris/core/models.py +360 -0
  20. maris-0.1.0/src/maris/embeddings/__init__.py +7 -0
  21. maris-0.1.0/src/maris/embeddings/ollama_embeddings.py +185 -0
  22. maris-0.1.0/src/maris/indexing/__init__.py +11 -0
  23. maris-0.1.0/src/maris/indexing/java_parser.py +547 -0
  24. maris-0.1.0/src/maris/indexing/parser.py +203 -0
  25. maris-0.1.0/src/maris/indexing/parser_factory.py +260 -0
  26. maris-0.1.0/src/maris/indexing/python_parser.py +438 -0
  27. maris-0.1.0/src/maris/indexing/scala_parser.py +539 -0
  28. maris-0.1.0/src/maris/knowledge/__init__.py +8 -0
  29. maris-0.1.0/src/maris/knowledge/repository_knowledge_impl.py +260 -0
  30. maris-0.1.0/src/maris/knowledge/service.py +239 -0
  31. maris-0.1.0/src/maris/storage/__init__.py +8 -0
  32. maris-0.1.0/src/maris/storage/metadata_store.py +774 -0
  33. maris-0.1.0/src/maris/storage/vector_store.py +278 -0
  34. maris-0.1.0/src/maris/utils/__init__.py +7 -0
  35. maris-0.1.0/src/maris/utils/validation.py +283 -0
  36. maris-0.1.0/src/maris.egg-info/PKG-INFO +686 -0
  37. maris-0.1.0/src/maris.egg-info/SOURCES.txt +50 -0
  38. maris-0.1.0/src/maris.egg-info/dependency_links.txt +1 -0
  39. maris-0.1.0/src/maris.egg-info/entry_points.txt +2 -0
  40. maris-0.1.0/src/maris.egg-info/requires.txt +28 -0
  41. maris-0.1.0/src/maris.egg-info/top_level.txt +1 -0
  42. maris-0.1.0/tests/test_dependency_extraction.py +236 -0
  43. maris-0.1.0/tests/test_documentation_agent.py +387 -0
  44. maris-0.1.0/tests/test_git_agent.py +485 -0
  45. maris-0.1.0/tests/test_impact_analysis_agent.py +249 -0
  46. maris-0.1.0/tests/test_indexing_agent.py +703 -0
  47. maris-0.1.0/tests/test_java_parser.py +521 -0
  48. maris-0.1.0/tests/test_orchestrator_agent.py +726 -0
  49. maris-0.1.0/tests/test_parser_factory.py +312 -0
  50. maris-0.1.0/tests/test_python_parser.py +284 -0
  51. maris-0.1.0/tests/test_qa_agent.py +560 -0
  52. maris-0.1.0/tests/test_scala_parser.py +465 -0
maris-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rohin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
maris-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,686 @@
1
+ Metadata-Version: 2.4
2
+ Name: maris
3
+ Version: 0.1.0
4
+ Summary: Local Multi-Agent Repository Intelligence System
5
+ Author-email: Rohin Patel <rohin.patel@outlook.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/rohinp/maris
8
+ Project-URL: Documentation, https://github.com/rohinp/maris/docs
9
+ Project-URL: Repository, https://github.com/rohinp/maris
10
+ Project-URL: Issues, https://github.com/rohinp/maris/issues
11
+ Keywords: repository,intelligence,llm,code-analysis,local-first
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: tree-sitter>=0.21.0
23
+ Requires-Dist: duckdb>=1.0.0
24
+ Requires-Dist: lancedb>=0.5.0
25
+ Requires-Dist: pyarrow>=17.0.0
26
+ Requires-Dist: langchain>=0.1.0
27
+ Requires-Dist: langchain-community>=0.0.20
28
+ Requires-Dist: langgraph>=0.0.20
29
+ Requires-Dist: ollama>=0.1.0
30
+ Requires-Dist: pydantic>=2.5.0
31
+ Requires-Dist: python-dotenv>=1.0.0
32
+ Requires-Dist: watchdog>=4.0.0
33
+ Requires-Dist: gitpython>=3.1.0
34
+ Requires-Dist: rich>=13.7.0
35
+ Requires-Dist: click>=8.1.0
36
+ Provides-Extra: dev
37
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
38
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
39
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
40
+ Requires-Dist: black>=24.0.0; extra == "dev"
41
+ Requires-Dist: ruff>=0.2.0; extra == "dev"
42
+ Requires-Dist: mypy>=1.8.0; extra == "dev"
43
+ Requires-Dist: pre-commit>=3.6.0; extra == "dev"
44
+ Provides-Extra: docs
45
+ Requires-Dist: mkdocs>=1.5.0; extra == "docs"
46
+ Requires-Dist: mkdocs-material>=9.5.0; extra == "docs"
47
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "docs"
48
+ Dynamic: license-file
49
+
50
+ # Local Multi-Agent Repository Intelligence System
51
+
52
+ ## Vision
53
+
54
+ Build a fully local, privacy-first repository intelligence platform that helps developers understand, navigate, document, analyze, and reason about source code.
55
+
56
+ The goal is **not** to compete with cloud coding assistants such as Claude Code, Cursor, GitHub Copilot, or OpenAI Codex.
57
+
58
+ The system will:
59
+
60
+ * Run locally
61
+ * Use local LLMs
62
+ * Never require source code to leave the machine
63
+ * Focus on understanding rather than code generation
64
+ * Be language-aware through AST parsing
65
+ * Maintain a continuously updated repository knowledge graph
66
+ * Support multiple specialized agents
67
+
68
+ The primary objective is to become a "repository expert" capable of answering questions, generating documentation, explaining architecture, performing impact analysis, and understanding code evolution over time.
69
+
70
+ ---
71
+
72
+ # Core Principles
73
+
74
+ ## 1. Retrieval First
75
+
76
+ The quality of answers depends on retrieval quality.
77
+
78
+ The system should prioritize:
79
+
80
+ * AST-aware indexing
81
+ * Symbol-aware retrieval
82
+ * Dependency-aware retrieval
83
+
84
+ over generic vector similarity search.
85
+
86
+ ---
87
+
88
+ ## 2. Code is a Graph
89
+
90
+ A repository is not a collection of files.
91
+
92
+ A repository is a graph of:
93
+
94
+ * Packages
95
+ * Modules
96
+ * Classes
97
+ * Traits
98
+ * Interfaces
99
+ * Functions
100
+ * Methods
101
+ * Dependencies
102
+ * Imports
103
+ * Call relationships
104
+
105
+ The system should maintain this graph as a first-class entity.
106
+
107
+ ---
108
+
109
+ ## 3. Local First
110
+
111
+ All processing should happen locally:
112
+
113
+ * Parsing
114
+ * Embedding generation
115
+ * Retrieval
116
+ * Reasoning
117
+
118
+ No external APIs are required.
119
+
120
+ ---
121
+
122
+ ## 4. Specialized Agents
123
+
124
+ Each agent should have a single responsibility.
125
+
126
+ Avoid creating one large autonomous agent.
127
+
128
+ Instead create multiple focused agents sharing a common knowledge layer.
129
+
130
+ ---
131
+
132
+ # High-Level Architecture
133
+
134
+ ```text
135
+ Repository
136
+
137
+
138
+
139
+
140
+
141
+ Indexing Agent
142
+
143
+
144
+
145
+
146
+
147
+ Repository Knowledge Layer
148
+
149
+ ├── Symbol Store
150
+ ├── Dependency Graph
151
+ ├── Vector Store
152
+ ├── Commit History
153
+ └── Metadata
154
+
155
+
156
+
157
+
158
+
159
+ Agents
160
+
161
+ ├── Documentation Agent ✅
162
+ ├── Q&A Agent ✅
163
+ ├── Git Agent ✅
164
+ ├── Impact Analysis Agent ✅
165
+ ├── Git Archaeology Agent (Planned)
166
+ └── Future Agents
167
+ ```
168
+
169
+ ---
170
+
171
+ # Technology Choices
172
+
173
+ ## Parsing
174
+
175
+ Use Tree-sitter.
176
+
177
+ Reason:
178
+
179
+ * Mature ecosystem
180
+ * Multi-language support
181
+ * Incremental parsing
182
+ * Existing grammars
183
+
184
+ Supported languages for MVP:
185
+
186
+ * Scala
187
+ * Java
188
+ * Python
189
+
190
+ Future:
191
+
192
+ * Go
193
+ * Rust
194
+ * Kotlin
195
+ * C++
196
+ * C#
197
+ * TypeScript
198
+
199
+ ---
200
+
201
+ ## Local LLM Runtime
202
+
203
+ Use Ollama.
204
+
205
+ Candidate models:
206
+
207
+ ### MVP
208
+
209
+ * Qwen3 8B
210
+ * Gemma 3 12B
211
+
212
+ ### Recommended
213
+
214
+ * Qwen3 32B
215
+
216
+ ### Future
217
+
218
+ * Qwen3 72B
219
+ * DeepSeek R1 Distill
220
+
221
+ ---
222
+
223
+ ## Embeddings
224
+
225
+ Candidate models:
226
+
227
+ * nomic-embed-text
228
+ * bge-large
229
+ * gte-large
230
+
231
+ Embeddings should only assist retrieval.
232
+
233
+ They must not become the primary retrieval mechanism.
234
+
235
+ ---
236
+
237
+ ## Agent Orchestration
238
+
239
+ Use LangGraph.
240
+
241
+ Reason:
242
+
243
+ * Explicit workflows
244
+ * State management
245
+ * Tool orchestration
246
+ * Easy future expansion
247
+
248
+ Avoid autonomous agent loops.
249
+
250
+ Prefer deterministic workflows.
251
+
252
+ ---
253
+
254
+ ## Storage
255
+
256
+ ### Metadata Store
257
+
258
+ DuckDB
259
+
260
+ Stores:
261
+
262
+ * symbols
263
+ * files
264
+ * relationships
265
+ * commits
266
+ * documentation
267
+
268
+ ---
269
+
270
+ ### Vector Store
271
+
272
+ LanceDB
273
+
274
+ Stores:
275
+
276
+ * embeddings
277
+ * semantic search index
278
+
279
+ Alternative:
280
+
281
+ * Qdrant
282
+
283
+ ---
284
+
285
+ ### Future Graph Database
286
+
287
+ Optional.
288
+
289
+ Candidates:
290
+
291
+ * KuzuDB
292
+ * Neo4j
293
+
294
+ Do not introduce graph databases during MVP.
295
+
296
+ ---
297
+
298
+ # Repository Knowledge Layer
299
+
300
+ This is the most important component.
301
+
302
+ All agents interact through this layer.
303
+
304
+ Responsibilities:
305
+
306
+ * Symbol lookup
307
+ * Dependency traversal
308
+ * Semantic retrieval
309
+ * Impact analysis support
310
+ * Commit history lookup
311
+
312
+ Example interface:
313
+
314
+ ```scala
315
+ trait RepositoryKnowledgeService {
316
+
317
+ def findSymbol(name: String)
318
+
319
+ def findCallers(symbol: Symbol)
320
+
321
+ def findCallees(symbol: Symbol)
322
+
323
+ def retrieveContext(question: String)
324
+
325
+ def impactedSymbols(symbol: Symbol)
326
+
327
+ }
328
+ ```
329
+
330
+ This layer becomes the foundation of the entire platform.
331
+
332
+ ---
333
+
334
+ # MVP
335
+
336
+ ## Agent 1: Repository Indexing Agent
337
+
338
+ ### Responsibilities
339
+
340
+ Convert source code into structured knowledge.
341
+
342
+ ### Workflow
343
+
344
+ Repository
345
+
346
+
347
+
348
+ Tree-sitter AST
349
+
350
+
351
+
352
+ Symbol Extraction
353
+
354
+
355
+
356
+ Dependency Extraction
357
+
358
+
359
+
360
+ Embedding Generation
361
+
362
+
363
+
364
+ Storage
365
+
366
+ ### Extracted Metadata
367
+
368
+ For every symbol:
369
+
370
+ ```json
371
+ {
372
+ "symbol": "GraphRunner.retryExecuteNode",
373
+ "type": "method",
374
+ "file": "GraphRunner.scala",
375
+ "language": "scala",
376
+ "calls": [
377
+ "attemptExecuteNode"
378
+ ]
379
+ }
380
+ ```
381
+
382
+ ### Incremental Updates
383
+
384
+ ✅ **Implemented via Git Agent**
385
+
386
+ The system now includes a Git Agent that:
387
+
388
+ * Detects changes via `git diff`
389
+ * Tracks the last indexed commit
390
+ * Re-indexes only changed files
391
+ * Supports incremental indexing via CLI: `maris index --incremental`
392
+
393
+ This dramatically improves indexing performance for large repositories.
394
+
395
+ See [Git Agent Documentation](docs/GIT_AGENT.md) for details.
396
+
397
+ ---
398
+
399
+ ## Agent 2: Documentation Agent
400
+
401
+ ### Responsibilities
402
+
403
+ Generate repository documentation.
404
+
405
+ ### Output
406
+
407
+ * Architecture overview
408
+ * Component documentation
409
+ * Module descriptions
410
+ * Dependency diagrams
411
+ * Data flow descriptions
412
+
413
+ ### Important Rule
414
+
415
+ Never generate documentation directly from raw files.
416
+
417
+ Always use indexed symbols and repository graph data.
418
+
419
+ ---
420
+
421
+ ## Agent 3: Repository Q&A Agent
422
+
423
+ ### Responsibilities
424
+
425
+ Answer questions about code.
426
+
427
+ Examples:
428
+
429
+ * Explain GraphRunner
430
+ * How does retry work?
431
+ * Where is reducer used?
432
+ * What happens when training starts?
433
+
434
+ ### Workflow
435
+
436
+ Question
437
+
438
+
439
+
440
+ Retrieve Symbols
441
+
442
+
443
+
444
+ Expand Dependencies
445
+
446
+
447
+
448
+ Build Context
449
+
450
+
451
+
452
+ LLM Reasoning
453
+
454
+
455
+
456
+ Answer
457
+
458
+ ### Goal
459
+
460
+ Context should consist of relevant symbols.
461
+
462
+ Not arbitrary chunks.
463
+
464
+ ---
465
+
466
+ # Future Roadmap
467
+
468
+ ## Agent 4: Git Agent
469
+
470
+ ✅ **Implemented** (June 2026)
471
+
472
+ Purpose:
473
+
474
+ Track repository changes and enable incremental indexing.
475
+
476
+ Capabilities:
477
+
478
+ * Detect changes since last indexing
479
+ * Categorize changes (added/modified/deleted/renamed)
480
+ * Enable efficient incremental re-indexing
481
+ * Track commit history
482
+
483
+ See [Git Agent Documentation](docs/GIT_AGENT.md) for details.
484
+
485
+ ---
486
+
487
+ ## Agent 5: Impact Analysis Agent
488
+
489
+ ✅ **Implemented** (June 2026)
490
+
491
+ Purpose:
492
+
493
+ Analyze the impact of code changes and help developers understand what will be affected by modifications.
494
+
495
+ Capabilities:
496
+
497
+ * **Dependency analysis**: Find direct and indirect callers, callees, and affected files
498
+ * **Test discovery**: Identify tests covering symbols and suggest missing scenarios
499
+ * **Edge case detection**: Detect missing null checks, error handling, and boundary conditions
500
+ * **Breaking change detection**: Identify potential breaking changes and affected callers
501
+ * **Recommendations**: Generate actionable recommendations based on analysis
502
+
503
+ Integration:
504
+
505
+ * **Auto-routing**: Orchestrator automatically routes impact-related questions (keywords: "impact", "affect", "break", "edge case", "test coverage")
506
+ * **Explicit CLI**:
507
+ - `maris impact analyze --symbol "SymbolName"`
508
+ - `maris impact edge-cases --file "path/to/file.py"`
509
+ - `maris impact tests --symbol "SymbolName"`
510
+ - `maris impact breaking-changes --symbol "SymbolName"`
511
+ * **Implicit via ask**: `maris ask "What will be affected if I change X?"`
512
+
513
+ Example:
514
+
515
+ ```bash
516
+ # Auto-routed to Impact Analysis Agent
517
+ maris ask "What will be affected if I change GitAgent?"
518
+
519
+ # Explicit impact analysis
520
+ maris impact analyze --symbol "GitAgent.detect_changes"
521
+ maris impact edge-cases --file "src/maris/agents/git_agent.py"
522
+ maris impact tests --symbol "QAAgent.answer_question"
523
+ ```
524
+
525
+ See [Impact Analysis Agent Documentation](docs/IMPACT_ANALYSIS_AGENT.md) for details.
526
+
527
+ ---
528
+
529
+ ## Agent 6: Git Archaeology Agent
530
+
531
+ Purpose:
532
+
533
+ Understand historical code evolution.
534
+
535
+ Questions:
536
+
537
+ * When was this bug introduced?
538
+ * Who changed this logic?
539
+ * Why was this method added?
540
+
541
+ Data Sources:
542
+
543
+ * git log
544
+ * git blame
545
+ * commit metadata
546
+
547
+ Capabilities:
548
+
549
+ * commit timeline generation
550
+ * code evolution summaries
551
+ * regression identification
552
+
553
+ ---
554
+
555
+ ## Agent 6: Test Suggestion Agent
556
+
557
+ Purpose:
558
+
559
+ Suggest tests based on modifications.
560
+
561
+ Inputs:
562
+
563
+ * changed symbols
564
+ * dependency graph
565
+ * historical bugs
566
+
567
+ Outputs:
568
+
569
+ * missing tests
570
+ * edge cases
571
+ * regression scenarios
572
+
573
+ ---
574
+
575
+ ## Agent 7: Architecture Evolution Agent
576
+
577
+ Purpose:
578
+
579
+ Track architecture changes over time.
580
+
581
+ Capabilities:
582
+
583
+ * detect coupling growth
584
+ * detect module boundaries
585
+ * identify hotspots
586
+ * detect architectural drift
587
+
588
+ ---
589
+
590
+ # Retrieval Strategy
591
+
592
+ ## Do Not
593
+
594
+ Generic chunking:
595
+
596
+ ```text
597
+ 1000 token chunks
598
+ ```
599
+
600
+ This loses structure.
601
+
602
+ ---
603
+
604
+ ## Preferred
605
+
606
+ AST-based symbol chunking.
607
+
608
+ Example:
609
+
610
+ ```text
611
+ Package
612
+
613
+ ├── Class
614
+
615
+ ├── Method
616
+
617
+ ├── Method
618
+
619
+ └── Method
620
+ ```
621
+
622
+ Each symbol becomes a retrievable unit.
623
+
624
+ ---
625
+
626
+ ## Retrieval Pipeline
627
+
628
+ Question
629
+
630
+
631
+
632
+ Vector Search
633
+
634
+
635
+
636
+ Symbol Expansion
637
+
638
+
639
+
640
+ Dependency Expansion
641
+
642
+
643
+
644
+ Context Assembly
645
+
646
+
647
+
648
+ Reasoning
649
+
650
+ This combines semantic search with graph traversal.
651
+
652
+ ---
653
+
654
+ # Non Goals
655
+
656
+ The system is NOT intended to:
657
+
658
+ * Generate PRs
659
+ * Automatically modify code
660
+ * Replace developers
661
+ * Act autonomously
662
+ * Execute arbitrary repository changes
663
+
664
+ The system is designed to help developers understand software.
665
+
666
+ ---
667
+
668
+ # Success Criteria
669
+
670
+ MVP is successful when:
671
+
672
+ 1. ✅ Repository indexing works incrementally (Git Agent)
673
+ 2. ✅ Symbols can be queried accurately
674
+ 3. ✅ Documentation can be generated automatically
675
+ 4. ✅ Q&A answers are grounded in repository knowledge
676
+ 5. ✅ Entire workflow runs locally
677
+ 6. ✅ No external API dependencies are required
678
+
679
+ **MVP Complete!** All success criteria have been met.
680
+
681
+ ---
682
+
683
+ # Long-Term Goal
684
+
685
+ Become a local repository intelligence platform capable of understanding large codebases as well as experienced maintainers, while remaining privacy-first, language-aware, and fully developer-controlled.
686
+