elastro-client 1.3.32__tar.gz → 1.3.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {elastro_client-1.3.32/elastro_client.egg-info → elastro_client-1.3.34}/PKG-INFO +6 -1
  2. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/__init__.py +1 -1
  3. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/cli.py +2 -0
  4. elastro_client-1.3.34/elastro/cli/commands/rag.py +72 -0
  5. elastro_client-1.3.34/elastro/core/rag/ast_parser.py +260 -0
  6. elastro_client-1.3.34/elastro/core/rag/ingestor.py +189 -0
  7. {elastro_client-1.3.32 → elastro_client-1.3.34/elastro_client.egg-info}/PKG-INFO +6 -1
  8. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro_client.egg-info/SOURCES.txt +3 -0
  9. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro_client.egg-info/requires.txt +5 -0
  10. {elastro_client-1.3.32 → elastro_client-1.3.34}/pyproject.toml +6 -1
  11. {elastro_client-1.3.32 → elastro_client-1.3.34}/.coveragerc +0 -0
  12. {elastro_client-1.3.32 → elastro_client-1.3.34}/LICENSE +0 -0
  13. {elastro_client-1.3.32 → elastro_client-1.3.34}/MANIFEST.in +0 -0
  14. {elastro_client-1.3.32 → elastro_client-1.3.34}/README.md +0 -0
  15. {elastro_client-1.3.32 → elastro_client-1.3.34}/docs/advanced_features.md +0 -0
  16. {elastro_client-1.3.32 → elastro_client-1.3.34}/docs/api_reference.md +0 -0
  17. {elastro_client-1.3.32 → elastro_client-1.3.34}/docs/cli_usage.md +0 -0
  18. {elastro_client-1.3.32 → elastro_client-1.3.34}/docs/commands_reference.md +0 -0
  19. {elastro_client-1.3.32 → elastro_client-1.3.34}/docs/getting_started.md +0 -0
  20. {elastro_client-1.3.32 → elastro_client-1.3.34}/docs/roadmap.md +0 -0
  21. {elastro_client-1.3.32 → elastro_client-1.3.34}/docs/troubleshooting.md +0 -0
  22. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/advanced/__init__.py +0 -0
  23. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/advanced/aggregations.py +0 -0
  24. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/advanced/query_builder.py +0 -0
  25. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/advanced/scroll.py +0 -0
  26. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/__init__.py +0 -0
  27. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/art.py +0 -0
  28. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/__init__.py +0 -0
  29. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/cluster.py +0 -0
  30. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/config.py +0 -0
  31. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/datastream.py +0 -0
  32. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/document.py +0 -0
  33. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/gui.py +0 -0
  34. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/ilm.py +0 -0
  35. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/index.py +0 -0
  36. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/index_recipes.py +0 -0
  37. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/ingest.py +0 -0
  38. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/ml.py +0 -0
  39. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/painless_commands.py +0 -0
  40. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/script.py +0 -0
  41. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/security.py +0 -0
  42. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/snapshot.py +0 -0
  43. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/tasks.py +0 -0
  44. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/template.py +0 -0
  45. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/commands/utils.py +0 -0
  46. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/completion.py +0 -0
  47. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/cli/output.py +0 -0
  48. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/config/__init__.py +0 -0
  49. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/config/defaults.py +0 -0
  50. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/config/loader.py +0 -0
  51. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/__init__.py +0 -0
  52. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/client.py +0 -0
  53. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/datastream.py +0 -0
  54. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/document.py +0 -0
  55. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/document_bulk.py +0 -0
  56. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/errors.py +0 -0
  57. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/ilm.py +0 -0
  58. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/index.py +0 -0
  59. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/logger.py +0 -0
  60. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/query_builder.py +0 -0
  61. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/snapshot.py +0 -0
  62. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/core/validation.py +0 -0
  63. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/gui/assets/index-CeUjjtn-.css +0 -0
  64. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/gui/assets/index-DNdGuJvV.js +0 -0
  65. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/gui/elastro.svg +0 -0
  66. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/gui/favicon.ico +0 -0
  67. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/gui/index.html +0 -0
  68. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/gui/vite.svg +0 -0
  69. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/py.typed +0 -0
  70. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/server.py +0 -0
  71. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/utils/__init__.py +0 -0
  72. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/utils/aliases.py +0 -0
  73. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/utils/health.py +0 -0
  74. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/utils/snapshots.py +0 -0
  75. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro/utils/templates.py +0 -0
  76. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro_client.egg-info/dependency_links.txt +0 -0
  77. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro_client.egg-info/entry_points.txt +0 -0
  78. {elastro_client-1.3.32 → elastro_client-1.3.34}/elastro_client.egg-info/top_level.txt +0 -0
  79. {elastro_client-1.3.32 → elastro_client-1.3.34}/examples/client.py +0 -0
  80. {elastro_client-1.3.32 → elastro_client-1.3.34}/examples/config_usage.py +0 -0
  81. {elastro_client-1.3.32 → elastro_client-1.3.34}/examples/datastreams.py +0 -0
  82. {elastro_client-1.3.32 → elastro_client-1.3.34}/examples/debug_connection.py +0 -0
  83. {elastro_client-1.3.32 → elastro_client-1.3.34}/examples/document_operations.py +0 -0
  84. {elastro_client-1.3.32 → elastro_client-1.3.34}/examples/index_management.py +0 -0
  85. {elastro_client-1.3.32 → elastro_client-1.3.34}/examples/search.py +0 -0
  86. {elastro_client-1.3.32 → elastro_client-1.3.34}/pytest.ini +0 -0
  87. {elastro_client-1.3.32 → elastro_client-1.3.34}/requirements.txt +0 -0
  88. {elastro_client-1.3.32 → elastro_client-1.3.34}/setup.cfg +0 -0
  89. {elastro_client-1.3.32 → elastro_client-1.3.34}/setup.py +0 -0
  90. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/__init__.py +0 -0
  91. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/conftest.py +0 -0
  92. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/fixtures/__init__.py +0 -0
  93. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/fixtures/datastream_fixtures.py +0 -0
  94. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/fixtures/document_fixtures.py +0 -0
  95. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/fixtures/index_fixtures.py +0 -0
  96. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/__init__.py +0 -0
  97. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/test_aggregations_integration.py +0 -0
  98. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/test_client_integration.py +0 -0
  99. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/test_datastream_integration.py +0 -0
  100. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/test_docs_quickstart.py +0 -0
  101. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/test_document_integration.py +0 -0
  102. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/test_index_integration.py +0 -0
  103. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/test_query_builder_integration.py +0 -0
  104. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/test_scroll_integration.py +0 -0
  105. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/integration/test_workflow_integration.py +0 -0
  106. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/manual/test_es.py +0 -0
  107. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/__init__.py +0 -0
  108. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/advanced/__init__.py +0 -0
  109. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/advanced/test_aggregations.py +0 -0
  110. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/advanced/test_query_builder.py +0 -0
  111. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/advanced/test_scroll.py +0 -0
  112. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/config/__init__.py +0 -0
  113. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/config/test_defaults.py +0 -0
  114. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/config/test_loader.py +0 -0
  115. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/core/__init__.py +0 -0
  116. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/core/test_client.py +0 -0
  117. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/core/test_datastream.py +0 -0
  118. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/core/test_document.py +0 -0
  119. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/core/test_document_bulk.py +0 -0
  120. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/core/test_errors.py +0 -0
  121. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/core/test_index.py +0 -0
  122. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/core/test_validation.py +0 -0
  123. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/utils/__init__.py +0 -0
  124. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/utils/test_aliases.py +0 -0
  125. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/utils/test_health.py +0 -0
  126. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/utils/test_snapshots.py +0 -0
  127. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/unit/utils/test_templates.py +0 -0
  128. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/verify_api_actions.py +0 -0
  129. {elastro_client-1.3.32 → elastro_client-1.3.34}/tests/verify_cli_e2e.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: elastro-client
3
- Version: 1.3.32
3
+ Version: 1.3.34
4
4
  Summary: A comprehensive Python library for Elasticsearch management with both programmatic and CLI interfaces
5
5
  Author: Austin Jorgensen
6
6
  License-Expression: MIT
@@ -29,6 +29,11 @@ Requires-Dist: rich>=10.0.0
29
29
  Requires-Dist: rich-click>=1.7.0
30
30
  Requires-Dist: fastapi>=0.111.0
31
31
  Requires-Dist: uvicorn>=0.30.0
32
+ Requires-Dist: tree-sitter>=0.23.0
33
+ Requires-Dist: tree-sitter-python>=0.23.0
34
+ Requires-Dist: tree-sitter-go>=0.23.0
35
+ Requires-Dist: tree-sitter-javascript>=0.23.0
36
+ Requires-Dist: tree-sitter-typescript>=0.23.0
32
37
  Provides-Extra: test
33
38
  Requires-Dist: pytest>=7.0.0; extra == "test"
34
39
  Requires-Dist: pytest-cov>=3.0.0; extra == "test"
@@ -4,7 +4,7 @@ Elasticsearch Management Module.
4
4
  A module for managing Elasticsearch operations within a pipeline process.
5
5
  """
6
6
 
7
- __version__ = "1.3.32"
7
+ __version__ = "1.3.34"
8
8
 
9
9
  # Core component imports
10
10
  from elastro.core.client import ElasticsearchClient
@@ -244,6 +244,7 @@ utils.add_command(utils_templates)
244
244
  utils.add_command(aliases)
245
245
 
246
246
  from elastro.cli.commands.gui import gui
247
+ from elastro.cli.commands.rag import rag_group
247
248
 
248
249
  # Register Top-Level Groups
249
250
  cli.add_command(template_group)
@@ -257,6 +258,7 @@ cli.add_command(ml_group)
257
258
  cli.add_command(script_group)
258
259
  cli.add_command(painless_group)
259
260
  cli.add_command(gui)
261
+ cli.add_command(rag_group)
260
262
 
261
263
 
262
264
  def main() -> None:
@@ -0,0 +1,72 @@
1
+ """
2
+ Graph RAG Commands for Code Flow Mapping.
3
+ """
4
+
5
+ import rich_click as click
6
+ import os
7
+ from typing import Optional
8
+ from elastro.core.client import ElasticsearchClient
9
+ from elastro.core.rag.ingestor import GraphRAGManager
10
+ from elastro.cli.output import format_output
11
+ from rich.console import Console
12
+
13
+ console = Console()
14
+
15
+
16
+ @click.group("rag")
17
+ def rag_group() -> None:
18
+ """
19
+ Manage Graph RAG (Retrieval Augmented Generation) capabilities.
20
+
21
+ Includes native Code Flow mapping, AST extraction for Python, Go, TypeScript, and Vue,
22
+ and direct injection into local Elasticsearch for Agentic Codebase Memory.
23
+ """
24
+ pass
25
+
26
+
27
+ @rag_group.command("ingest")
28
+ @click.argument(
29
+ "repo_path", type=click.Path(exists=True, file_okay=False, dir_okay=True)
30
+ )
31
+ @click.option(
32
+ "--index",
33
+ "-i",
34
+ type=str,
35
+ default="fremen_codebase_rag",
36
+ help="Target Elasticsearch index name (default: fremen_codebase_rag)",
37
+ )
38
+ @click.pass_obj
39
+ def ingest_repo(client: ElasticsearchClient, repo_path: str, index: str) -> None:
40
+ """
41
+ Ingest a repository with AST Code Flow Mapping.
42
+
43
+ This command scans your codebase, utilizes Tree-sitter polyglot AST parsers
44
+ to extract 'functions_defined' and 'functions_called', and uses the
45
+ Elasticsearch Bulk API to stripe the Graph RAG data perfectly into an index.
46
+
47
+ Example:
48
+ ```bash
49
+ elastro rag ingest /path/to/my/go-repo
50
+ ```
51
+ """
52
+ repo_name = os.path.basename(os.path.abspath(repo_path))
53
+ console.print(
54
+ f"[bold cyan]🔍 Initializing Graph RAG AST Parsing for '{repo_name}'[/bold cyan]"
55
+ )
56
+
57
+ manager = GraphRAGManager(client, index)
58
+
59
+ try:
60
+ # We handle index scaffolding internally in GraphRAGManager
61
+ success_count = manager.ingest_repository(repo_path)
62
+
63
+ console.print(
64
+ f"[bold green]✅ Success![/bold green] Ingested and mapped Code Flows for [bold]{success_count}[/bold] files."
65
+ )
66
+ console.print(
67
+ f"[dim]The AST Graph RAG context is now active in index '{index}'.[/dim]"
68
+ )
69
+
70
+ except Exception as e:
71
+ console.print(f"[bold red]❌ RAG Ingestion Failed:[/bold red] {str(e)}")
72
+ exit(1)
@@ -0,0 +1,260 @@
1
+ """
2
+ Abstract Syntax Tree (AST) parser utilizing tree-sitter for Graph RAG Code Flow Mapping.
3
+ Supports polyglot analysis of Python, Go, JavaScript, TypeScript, and Vue.
4
+ Implements Semantic Chunking and Signature Extraction.
5
+ """
6
+
7
+ import os
8
+ import tree_sitter # type: ignore
9
+ from typing import Dict, List, Set, Any
10
+
11
+
12
+ class ASTParser:
13
+ def __init__(self) -> None:
14
+ self.parsers: Dict[str, tree_sitter.Parser] = {}
15
+ self._initialize_parsers()
16
+
17
+ def _initialize_parsers(self) -> None:
18
+ """Loads natively compiled tree-sitter language capsules."""
19
+ try:
20
+ from tree_sitter import Language, Parser
21
+ import tree_sitter_python as tsp # type: ignore
22
+ import tree_sitter_go as tsg # type: ignore
23
+ import tree_sitter_javascript as tsjs # type: ignore
24
+ import tree_sitter_typescript as tsts # type: ignore
25
+
26
+ # v0.25+ API natively imports .language() as PyCapsule pointers
27
+ self.langs = {
28
+ "python": Language(tsp.language()),
29
+ "go": Language(tsg.language()),
30
+ "javascript": Language(tsjs.language()),
31
+ "typescript": Language(tsts.language_typescript()),
32
+ # Vue uses specialized HTML embedding, but for script logic, TS/JS parser handles it well
33
+ "vue": Language(tsts.language_typescript()),
34
+ }
35
+
36
+ for lang_name, lang_obj in self.langs.items():
37
+ parser = Parser()
38
+ parser.language = lang_obj
39
+ self.parsers[lang_name] = parser
40
+
41
+ except ImportError as e:
42
+ # Degrade gracefully if tree-sitter is missing during testing/CI
43
+ print(f"Graph RAG AST Parser initialization warning: {e}")
44
+ pass
45
+
46
+ def _determine_language(self, ext: str) -> str:
47
+ ext = ext.lower()
48
+ if ext == ".py":
49
+ return "python"
50
+ elif ext == ".go":
51
+ return "go"
52
+ elif ext in [".js", ".jsx"]:
53
+ return "javascript"
54
+ elif ext in [".ts", ".tsx"]:
55
+ return "typescript"
56
+ elif ext == ".vue":
57
+ return "vue"
58
+ return "unsupported"
59
+
60
+ def parse_file(self, file_path: str, content: str) -> List[Dict[str, Any]]:
61
+ """
62
+ Parses raw code and extracts semantic chunks (functions/classes) with deterministic call graphs.
63
+ """
64
+ ext = os.path.splitext(file_path)[1]
65
+ lang = self._determine_language(ext)
66
+
67
+ if lang not in self.parsers:
68
+ return [
69
+ {
70
+ "chunk_type": "file",
71
+ "name": "module",
72
+ "content": content,
73
+ "functions_defined": [],
74
+ "functions_called": [],
75
+ }
76
+ ]
77
+
78
+ parser = self.parsers[lang]
79
+ source = content.encode("utf-8")
80
+ tree = parser.parse(source)
81
+
82
+ chunks: List[Dict[str, Any]] = []
83
+
84
+ self._extract_chunks(tree.root_node, lang, source, chunks)
85
+
86
+ # If no semantic chunks were found, fallback to parsing the whole file as a single module chunk
87
+ if not chunks:
88
+ called: Set[str] = set()
89
+ self._traverse_for_calls(tree.root_node, lang, source, called)
90
+ chunks.append(
91
+ {
92
+ "chunk_type": "file",
93
+ "name": "module",
94
+ "content": content,
95
+ "functions_defined": [],
96
+ "functions_called": sorted(list(called)),
97
+ }
98
+ )
99
+
100
+ return chunks
101
+
102
+ def _extract_chunks(
103
+ self,
104
+ node: tree_sitter.Node,
105
+ lang: str,
106
+ source: bytes,
107
+ chunks: List[Dict[str, Any]],
108
+ ) -> None:
109
+ """Recursive AST Walk for semantic chunking."""
110
+ node_type = node.type
111
+
112
+ is_chunk = False
113
+ chunk_name = ""
114
+ chunk_type = ""
115
+
116
+ if lang == "python":
117
+ if node_type in ["function_definition", "class_definition"]:
118
+ is_chunk = True
119
+ chunk_type = "function" if "function" in node_type else "class"
120
+ name_node = node.child_by_field_name("name")
121
+ if name_node:
122
+ chunk_name = source[
123
+ name_node.start_byte : name_node.end_byte
124
+ ].decode("utf-8")
125
+
126
+ elif lang == "go":
127
+ if node_type in [
128
+ "function_declaration",
129
+ "method_declaration",
130
+ "type_declaration",
131
+ ]:
132
+ is_chunk = True
133
+ chunk_type = (
134
+ "function"
135
+ if "function" in node_type or "method" in node_type
136
+ else "class"
137
+ )
138
+ name_node = node.child_by_field_name("name")
139
+ if name_node:
140
+ chunk_name = source[
141
+ name_node.start_byte : name_node.end_byte
142
+ ].decode("utf-8")
143
+
144
+ elif lang in ["javascript", "typescript", "vue"]:
145
+ if node_type in [
146
+ "function_declaration",
147
+ "method_definition",
148
+ "class_declaration",
149
+ ]:
150
+ is_chunk = True
151
+ chunk_type = (
152
+ "function"
153
+ if "function" in node_type or "method" in node_type
154
+ else "class"
155
+ )
156
+ name_node = node.child_by_field_name("name")
157
+ if name_node:
158
+ chunk_name = source[
159
+ name_node.start_byte : name_node.end_byte
160
+ ].decode("utf-8")
161
+ elif node_type == "variable_declarator":
162
+ name_node = node.child_by_field_name("name")
163
+ val_node = node.child_by_field_name("value")
164
+ if name_node and val_node and val_node.type == "arrow_function":
165
+ is_chunk = True
166
+ chunk_type = "function"
167
+ chunk_name = source[
168
+ name_node.start_byte : name_node.end_byte
169
+ ].decode("utf-8")
170
+
171
+ if is_chunk:
172
+ called: Set[str] = set()
173
+ # Traverse specifically inside this chunk's boundaries to extract calls
174
+ self._traverse_for_calls(node, lang, source, called)
175
+
176
+ # To ensure proper context window, extract the source of just this node
177
+ chunk_content = source[node.start_byte : node.end_byte].decode("utf-8")
178
+
179
+ chunks.append(
180
+ {
181
+ "chunk_type": chunk_type,
182
+ "name": chunk_name or "anonymous",
183
+ "content": chunk_content,
184
+ "functions_defined": [chunk_name] if chunk_name else [],
185
+ "functions_called": sorted(list(called)),
186
+ }
187
+ )
188
+
189
+ # Continue recursing to find nested functions/classes (e.g., methods inside a class)
190
+ for child in node.children:
191
+ self._extract_chunks(child, lang, source, chunks)
192
+
193
+ def _traverse_for_calls(
194
+ self, node: tree_sitter.Node, lang: str, source: bytes, called: Set[str]
195
+ ) -> None:
196
+ """Extracts call chains including their parameters/arguments for exact signature mapping."""
197
+ if node.type == "call_expression":
198
+ func_node = node.child_by_field_name("function")
199
+ if func_node:
200
+ call_text = self._extract_call_chain(func_node, source)
201
+
202
+ ignore_list = {
203
+ "print",
204
+ "len",
205
+ "range",
206
+ "str",
207
+ "int",
208
+ "list",
209
+ "dict",
210
+ "set",
211
+ "super",
212
+ "append",
213
+ "fmt.Println",
214
+ "console.log",
215
+ "console.error",
216
+ "require",
217
+ "getattr",
218
+ "setattr",
219
+ "hasattr",
220
+ "isinstance",
221
+ "type",
222
+ }
223
+
224
+ if call_text and call_text not in ignore_list:
225
+ # Signature Extraction: Pull the arguments as well
226
+ args_node = node.child_by_field_name("arguments")
227
+ if args_node:
228
+ args_text = source[
229
+ args_node.start_byte : args_node.end_byte
230
+ ].decode("utf-8")
231
+ # Sanitize whitespace/newlines
232
+ args_text = " ".join(args_text.split())
233
+ if len(args_text) < 60:
234
+ call_text = f"{call_text}{args_text}"
235
+
236
+ if len(call_text) < 120 and "\n" not in call_text:
237
+ called.add(call_text)
238
+
239
+ for child in node.children:
240
+ self._traverse_for_calls(child, lang, source, called)
241
+
242
+ def _extract_call_chain(self, node: tree_sitter.Node, source: bytes) -> str:
243
+ """Recursively builds pure string representation of chained object method calls."""
244
+ if node.type == "identifier":
245
+ return source[node.start_byte : node.end_byte].decode("utf-8")
246
+ elif node.type == "attribute":
247
+ obj_node = node.child_by_field_name("object")
248
+ attr_node = node.child_by_field_name("attribute")
249
+ if obj_node and attr_node:
250
+ obj_str = self._extract_call_chain(obj_node, source)
251
+ attr_str = source[attr_node.start_byte : attr_node.end_byte].decode(
252
+ "utf-8"
253
+ )
254
+ return f"{obj_str}.{attr_str}"
255
+ elif node.type == "call":
256
+ func_node = node.child_by_field_name("function")
257
+ if func_node:
258
+ return self._extract_call_chain(func_node, source)
259
+
260
+ return source[node.start_byte : node.end_byte].decode("utf-8").strip()
@@ -0,0 +1,189 @@
1
+ """
2
+ Graph RAG orchestration module.
3
+ Scans physical codebases, leverages the ASTParser to extract functional flow maps,
4
+ and bulk indexes the enriched documents into Elasticsearch.
5
+ Updated with Semantic Chunking and BM25 Support.
6
+ """
7
+
8
+ import os
9
+ import json
10
+ import logging
11
+ from typing import List, Dict, Any, Generator
12
+ from elastro.core.client import ElasticsearchClient
13
+ from elastro.core.rag.ast_parser import ASTParser
14
+
15
+ logger = logging.getLogger("elastro.rag")
16
+
17
+
18
+ class GraphRAGManager:
19
+ def __init__(
20
+ self, client: ElasticsearchClient, index_name: str = "fremen_codebase_rag"
21
+ ):
22
+ self.client = client
23
+ self.index_name = index_name
24
+ self.ast_parser = ASTParser()
25
+
26
+ # We explicitly target the agent's core polyglot languages
27
+ self.supported_extensions = {".py", ".go", ".vue", ".js", ".ts", ".jsx", ".tsx"}
28
+
29
+ # Prevent indexing binary data, large dependency folders, or generated outputs
30
+ self.ignore_dirs = {
31
+ ".git",
32
+ "node_modules",
33
+ "venv",
34
+ ".venv",
35
+ "env",
36
+ "__pycache__",
37
+ "dist",
38
+ "build",
39
+ ".next",
40
+ ".nuxt",
41
+ "coverage",
42
+ ".idea",
43
+ ".vscode",
44
+ }
45
+
46
+ def _should_ignore(self, path: str) -> bool:
47
+ """Determines if a directory or file should be skipped during ingestion."""
48
+ parts = os.path.normpath(path).split(os.sep)
49
+ for part in parts:
50
+ if part in self.ignore_dirs:
51
+ return True
52
+ return False
53
+
54
+ def scaffold_index(self) -> None:
55
+ """Safely creates the Graph RAG index if it does not manually exist."""
56
+ pipeline_deployed = False
57
+ # 1. Attempt to build the Dense Vector inference pipeline for Hybrid Search
58
+ try:
59
+ self.client.client.ingest.put_pipeline(
60
+ id="elastro-elser-v2",
61
+ description="Process text via ELSER for Dense Vector Hybrid Search",
62
+ processors=[
63
+ {
64
+ "inference": {
65
+ "model_id": ".elser_model_2",
66
+ "field_map": {"content": "text_field"},
67
+ "target_field": "content_embedding",
68
+ "inference_config": {
69
+ "text_expansion": {"results_field": "tokens"}
70
+ },
71
+ "ignore_missing": True,
72
+ "ignore_failure": True,
73
+ }
74
+ }
75
+ ],
76
+ )
77
+ logger.info(
78
+ "Successfully deployed ELSER inference pipeline framework (Enterprise Mode)."
79
+ )
80
+ pipeline_deployed = True
81
+ except Exception as e:
82
+ logger.warning(
83
+ f"Could not scaffold ELSER pipeline (requires ML nodes or Platinum/Enterprise license). Falling back to Open Source BM25 Mode. Error: {e}"
84
+ )
85
+
86
+ # Simple existence check without causing hard errors if it doesn't
87
+ if not self.client.client.indices.exists(index=self.index_name):
88
+ logger.info(f"Creating Graph RAG index '{self.index_name}'")
89
+
90
+ settings: Dict[str, Any] = {"number_of_shards": 1, "number_of_replicas": 0}
91
+ if pipeline_deployed:
92
+ settings["default_pipeline"] = "elastro-elser-v2"
93
+
94
+ mappings: Dict[str, Any] = {
95
+ "properties": {
96
+ "repo_name": {"type": "keyword"},
97
+ "file_path": {"type": "keyword"},
98
+ "extension": {"type": "keyword"},
99
+ "chunk_type": {"type": "keyword"},
100
+ "chunk_name": {"type": "keyword"},
101
+ "content": {"type": "text"},
102
+ "functions_defined": {"type": "keyword"},
103
+ "functions_called": {"type": "keyword"},
104
+ }
105
+ }
106
+ if pipeline_deployed:
107
+ mappings["properties"]["content_embedding"] = {"type": "sparse_vector"}
108
+
109
+ # We enforce standard RAG architecture with the Semantic Chunking fields.
110
+ self.client.client.indices.create(
111
+ index=self.index_name, body={"settings": settings, "mappings": mappings}
112
+ )
113
+
114
+ def scan_and_yield(self, repo_path: str) -> Generator[Dict[str, Any], None, None]:
115
+ """
116
+ Walks the repository sequentially. Applies Semantic Chunking and AST parsing.
117
+ Yields Elasticsearch Bulk API formatted operation dictionaries.
118
+ """
119
+ # Ensure we have absolute paths.
120
+ repo_path = os.path.abspath(repo_path)
121
+ repo_name = os.path.basename(repo_path)
122
+
123
+ for root, dirs, files in os.walk(repo_path):
124
+ # Prune ignored directories in-place to avoid deep pointless traversals
125
+ dirs[:] = [
126
+ d for d in dirs if not self._should_ignore(os.path.join(root, d))
127
+ ]
128
+
129
+ for file in files:
130
+ file_path = os.path.join(root, file)
131
+
132
+ # Check explicit supported AST languages or ignore check
133
+ ext = os.path.splitext(file)[1].lower()
134
+ if ext not in self.supported_extensions or self._should_ignore(
135
+ file_path
136
+ ):
137
+ continue
138
+
139
+ try:
140
+ with open(file_path, "r", encoding="utf-8") as f:
141
+ content = f.read()
142
+
143
+ # 1. Engage the AST parser to extract the "Graph" semantic chunks
144
+ chunks = self.ast_parser.parse_file(file_path, content)
145
+ rel_path = os.path.relpath(file_path, repo_path)
146
+
147
+ # 2. Yield the Bulk API _index action for each specific chunk
148
+ for i, chunk in enumerate(chunks):
149
+ doc_id = f"{repo_name}:{rel_path}::{i}"
150
+
151
+ yield {
152
+ "_op_type": "index",
153
+ "_index": self.index_name,
154
+ "_id": doc_id,
155
+ "_source": {
156
+ "repo_name": repo_name,
157
+ "file_path": rel_path,
158
+ "extension": ext,
159
+ "chunk_type": chunk.get("chunk_type", "file"),
160
+ "chunk_name": chunk.get("name", "module"),
161
+ "content": chunk.get("content", content),
162
+ "functions_defined": chunk.get("functions_defined", []),
163
+ "functions_called": chunk.get("functions_called", []),
164
+ },
165
+ }
166
+ except UnicodeDecodeError:
167
+ # Skip binary files that masquerade as text extensions
168
+ continue
169
+ except Exception as e:
170
+ logger.error(f"Error parsing Graph RAG for {file_path}: {e}")
171
+
172
+ def ingest_repository(self, repo_path: str) -> int:
173
+ """
174
+ Orchestrates Elasticsearch bulk insertion directly leveraging the client helpers.
175
+ """
176
+ from elasticsearch.helpers import bulk
177
+
178
+ # Fire up the index blueprint
179
+ self.scaffold_index()
180
+
181
+ # Extract AST Flows & Stream seamlessly to Elastic
182
+ success_count, failed_inserts = bulk(
183
+ self.client.client,
184
+ self.scan_and_yield(repo_path),
185
+ chunk_size=500, # Large batch sizes for fast performance on local M4 Mini
186
+ stats_only=True,
187
+ )
188
+
189
+ return success_count
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: elastro-client
3
- Version: 1.3.32
3
+ Version: 1.3.34
4
4
  Summary: A comprehensive Python library for Elasticsearch management with both programmatic and CLI interfaces
5
5
  Author: Austin Jorgensen
6
6
  License-Expression: MIT
@@ -29,6 +29,11 @@ Requires-Dist: rich>=10.0.0
29
29
  Requires-Dist: rich-click>=1.7.0
30
30
  Requires-Dist: fastapi>=0.111.0
31
31
  Requires-Dist: uvicorn>=0.30.0
32
+ Requires-Dist: tree-sitter>=0.23.0
33
+ Requires-Dist: tree-sitter-python>=0.23.0
34
+ Requires-Dist: tree-sitter-go>=0.23.0
35
+ Requires-Dist: tree-sitter-javascript>=0.23.0
36
+ Requires-Dist: tree-sitter-typescript>=0.23.0
32
37
  Provides-Extra: test
33
38
  Requires-Dist: pytest>=7.0.0; extra == "test"
34
39
  Requires-Dist: pytest-cov>=3.0.0; extra == "test"
@@ -37,6 +37,7 @@ elastro/cli/commands/index_recipes.py
37
37
  elastro/cli/commands/ingest.py
38
38
  elastro/cli/commands/ml.py
39
39
  elastro/cli/commands/painless_commands.py
40
+ elastro/cli/commands/rag.py
40
41
  elastro/cli/commands/script.py
41
42
  elastro/cli/commands/security.py
42
43
  elastro/cli/commands/snapshot.py
@@ -58,6 +59,8 @@ elastro/core/logger.py
58
59
  elastro/core/query_builder.py
59
60
  elastro/core/snapshot.py
60
61
  elastro/core/validation.py
62
+ elastro/core/rag/ast_parser.py
63
+ elastro/core/rag/ingestor.py
61
64
  elastro/gui/elastro.svg
62
65
  elastro/gui/favicon.ico
63
66
  elastro/gui/index.html
@@ -8,6 +8,11 @@ rich>=10.0.0
8
8
  rich-click>=1.7.0
9
9
  fastapi>=0.111.0
10
10
  uvicorn>=0.30.0
11
+ tree-sitter>=0.23.0
12
+ tree-sitter-python>=0.23.0
13
+ tree-sitter-go>=0.23.0
14
+ tree-sitter-javascript>=0.23.0
15
+ tree-sitter-typescript>=0.23.0
11
16
 
12
17
  [dev]
13
18
  pytest>=7.0.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "elastro-client"
7
- version = "1.3.32"
7
+ version = "1.3.34"
8
8
  description = "A comprehensive Python library for Elasticsearch management with both programmatic and CLI interfaces"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9,<3.14"
@@ -36,6 +36,11 @@ dependencies = [
36
36
  "rich-click>=1.7.0",
37
37
  "fastapi>=0.111.0",
38
38
  "uvicorn>=0.30.0",
39
+ "tree-sitter>=0.23.0",
40
+ "tree-sitter-python>=0.23.0",
41
+ "tree-sitter-go>=0.23.0",
42
+ "tree-sitter-javascript>=0.23.0",
43
+ "tree-sitter-typescript>=0.23.0",
39
44
  ]
40
45
 
41
46
  [project.optional-dependencies]
File without changes