alita-sdk 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +156 -0
  6. alita_sdk/cli/agent_loader.py +245 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3113 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1073 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/testcases/__init__.py +94 -0
  23. alita_sdk/cli/testcases/data_generation.py +119 -0
  24. alita_sdk/cli/testcases/discovery.py +96 -0
  25. alita_sdk/cli/testcases/executor.py +84 -0
  26. alita_sdk/cli/testcases/logger.py +85 -0
  27. alita_sdk/cli/testcases/parser.py +172 -0
  28. alita_sdk/cli/testcases/prompts.py +91 -0
  29. alita_sdk/cli/testcases/reporting.py +125 -0
  30. alita_sdk/cli/testcases/setup.py +108 -0
  31. alita_sdk/cli/testcases/test_runner.py +282 -0
  32. alita_sdk/cli/testcases/utils.py +39 -0
  33. alita_sdk/cli/testcases/validation.py +90 -0
  34. alita_sdk/cli/testcases/workflow.py +196 -0
  35. alita_sdk/cli/toolkit.py +327 -0
  36. alita_sdk/cli/toolkit_loader.py +85 -0
  37. alita_sdk/cli/tools/__init__.py +43 -0
  38. alita_sdk/cli/tools/approval.py +224 -0
  39. alita_sdk/cli/tools/filesystem.py +1751 -0
  40. alita_sdk/cli/tools/planning.py +389 -0
  41. alita_sdk/cli/tools/terminal.py +414 -0
  42. alita_sdk/community/__init__.py +72 -12
  43. alita_sdk/community/inventory/__init__.py +236 -0
  44. alita_sdk/community/inventory/config.py +257 -0
  45. alita_sdk/community/inventory/enrichment.py +2137 -0
  46. alita_sdk/community/inventory/extractors.py +1469 -0
  47. alita_sdk/community/inventory/ingestion.py +3172 -0
  48. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  49. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  50. alita_sdk/community/inventory/parsers/base.py +295 -0
  51. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  52. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  53. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  54. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  55. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  56. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  57. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  58. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  59. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  60. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  61. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  62. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  63. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  64. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  65. alita_sdk/community/inventory/patterns/loader.py +348 -0
  66. alita_sdk/community/inventory/patterns/registry.py +198 -0
  67. alita_sdk/community/inventory/presets.py +535 -0
  68. alita_sdk/community/inventory/retrieval.py +1403 -0
  69. alita_sdk/community/inventory/toolkit.py +173 -0
  70. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  71. alita_sdk/community/inventory/visualize.py +1370 -0
  72. alita_sdk/configurations/__init__.py +1 -1
  73. alita_sdk/configurations/ado.py +141 -20
  74. alita_sdk/configurations/bitbucket.py +94 -2
  75. alita_sdk/configurations/confluence.py +130 -1
  76. alita_sdk/configurations/figma.py +76 -0
  77. alita_sdk/configurations/gitlab.py +91 -0
  78. alita_sdk/configurations/jira.py +103 -0
  79. alita_sdk/configurations/openapi.py +329 -0
  80. alita_sdk/configurations/qtest.py +72 -1
  81. alita_sdk/configurations/report_portal.py +96 -0
  82. alita_sdk/configurations/sharepoint.py +148 -0
  83. alita_sdk/configurations/testio.py +83 -0
  84. alita_sdk/configurations/testrail.py +88 -0
  85. alita_sdk/configurations/xray.py +93 -0
  86. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  87. alita_sdk/configurations/zephyr_essential.py +75 -0
  88. alita_sdk/runtime/clients/artifact.py +3 -3
  89. alita_sdk/runtime/clients/client.py +388 -46
  90. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  91. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  92. alita_sdk/runtime/clients/sandbox_client.py +8 -21
  93. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  94. alita_sdk/runtime/langchain/assistant.py +157 -39
  95. alita_sdk/runtime/langchain/constants.py +647 -1
  96. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  97. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  98. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  99. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -4
  100. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  101. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  102. alita_sdk/runtime/langchain/document_loaders/constants.py +40 -19
  103. alita_sdk/runtime/langchain/langraph_agent.py +405 -84
  104. alita_sdk/runtime/langchain/utils.py +106 -7
  105. alita_sdk/runtime/llms/preloaded.py +2 -6
  106. alita_sdk/runtime/models/mcp_models.py +61 -0
  107. alita_sdk/runtime/skills/__init__.py +91 -0
  108. alita_sdk/runtime/skills/callbacks.py +498 -0
  109. alita_sdk/runtime/skills/discovery.py +540 -0
  110. alita_sdk/runtime/skills/executor.py +610 -0
  111. alita_sdk/runtime/skills/input_builder.py +371 -0
  112. alita_sdk/runtime/skills/models.py +330 -0
  113. alita_sdk/runtime/skills/registry.py +355 -0
  114. alita_sdk/runtime/skills/skill_runner.py +330 -0
  115. alita_sdk/runtime/toolkits/__init__.py +31 -0
  116. alita_sdk/runtime/toolkits/application.py +29 -10
  117. alita_sdk/runtime/toolkits/artifact.py +20 -11
  118. alita_sdk/runtime/toolkits/datasource.py +13 -6
  119. alita_sdk/runtime/toolkits/mcp.py +783 -0
  120. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  121. alita_sdk/runtime/toolkits/planning.py +178 -0
  122. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  123. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  124. alita_sdk/runtime/toolkits/tools.py +356 -69
  125. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  126. alita_sdk/runtime/tools/__init__.py +10 -3
  127. alita_sdk/runtime/tools/application.py +27 -6
  128. alita_sdk/runtime/tools/artifact.py +511 -28
  129. alita_sdk/runtime/tools/data_analysis.py +183 -0
  130. alita_sdk/runtime/tools/function.py +67 -35
  131. alita_sdk/runtime/tools/graph.py +10 -4
  132. alita_sdk/runtime/tools/image_generation.py +148 -46
  133. alita_sdk/runtime/tools/llm.py +1003 -128
  134. alita_sdk/runtime/tools/loop.py +3 -1
  135. alita_sdk/runtime/tools/loop_output.py +3 -1
  136. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  137. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  138. alita_sdk/runtime/tools/mcp_server_tool.py +8 -5
  139. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  140. alita_sdk/runtime/tools/planning/models.py +246 -0
  141. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  142. alita_sdk/runtime/tools/router.py +2 -4
  143. alita_sdk/runtime/tools/sandbox.py +65 -48
  144. alita_sdk/runtime/tools/skill_router.py +776 -0
  145. alita_sdk/runtime/tools/tool.py +3 -1
  146. alita_sdk/runtime/tools/vectorstore.py +9 -3
  147. alita_sdk/runtime/tools/vectorstore_base.py +70 -14
  148. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  149. alita_sdk/runtime/utils/constants.py +5 -1
  150. alita_sdk/runtime/utils/mcp_client.py +492 -0
  151. alita_sdk/runtime/utils/mcp_oauth.py +361 -0
  152. alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
  153. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  154. alita_sdk/runtime/utils/serialization.py +155 -0
  155. alita_sdk/runtime/utils/streamlit.py +40 -13
  156. alita_sdk/runtime/utils/toolkit_utils.py +30 -9
  157. alita_sdk/runtime/utils/utils.py +36 -0
  158. alita_sdk/tools/__init__.py +134 -35
  159. alita_sdk/tools/ado/repos/__init__.py +51 -32
  160. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  161. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  162. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  163. alita_sdk/tools/ado/utils.py +1 -18
  164. alita_sdk/tools/ado/wiki/__init__.py +25 -12
  165. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  166. alita_sdk/tools/ado/work_item/__init__.py +26 -13
  167. alita_sdk/tools/ado/work_item/ado_wrapper.py +73 -11
  168. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  169. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  170. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  171. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  172. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  173. alita_sdk/tools/base/tool.py +5 -1
  174. alita_sdk/tools/base_indexer_toolkit.py +271 -84
  175. alita_sdk/tools/bitbucket/__init__.py +17 -11
  176. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  177. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  178. alita_sdk/tools/browser/__init__.py +5 -4
  179. alita_sdk/tools/carrier/__init__.py +5 -6
  180. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  181. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  182. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  183. alita_sdk/tools/chunkers/__init__.py +3 -1
  184. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  185. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  186. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  187. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  188. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  189. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  190. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  191. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  192. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  193. alita_sdk/tools/code/linter/__init__.py +10 -8
  194. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  195. alita_sdk/tools/code/sonar/__init__.py +11 -8
  196. alita_sdk/tools/code_indexer_toolkit.py +82 -22
  197. alita_sdk/tools/confluence/__init__.py +22 -16
  198. alita_sdk/tools/confluence/api_wrapper.py +107 -30
  199. alita_sdk/tools/confluence/loader.py +14 -2
  200. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  201. alita_sdk/tools/elastic/__init__.py +11 -8
  202. alita_sdk/tools/elitea_base.py +493 -30
  203. alita_sdk/tools/figma/__init__.py +58 -11
  204. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  205. alita_sdk/tools/figma/figma_client.py +73 -0
  206. alita_sdk/tools/figma/toon_tools.py +2748 -0
  207. alita_sdk/tools/github/__init__.py +14 -15
  208. alita_sdk/tools/github/github_client.py +224 -100
  209. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  210. alita_sdk/tools/github/schemas.py +14 -5
  211. alita_sdk/tools/github/tool.py +5 -1
  212. alita_sdk/tools/github/tool_prompts.py +9 -22
  213. alita_sdk/tools/gitlab/__init__.py +16 -11
  214. alita_sdk/tools/gitlab/api_wrapper.py +218 -48
  215. alita_sdk/tools/gitlab_org/__init__.py +10 -9
  216. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  217. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  218. alita_sdk/tools/google/bigquery/tool.py +5 -1
  219. alita_sdk/tools/google_places/__init__.py +11 -8
  220. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  221. alita_sdk/tools/jira/__init__.py +17 -10
  222. alita_sdk/tools/jira/api_wrapper.py +92 -41
  223. alita_sdk/tools/keycloak/__init__.py +11 -8
  224. alita_sdk/tools/localgit/__init__.py +9 -3
  225. alita_sdk/tools/localgit/local_git.py +62 -54
  226. alita_sdk/tools/localgit/tool.py +5 -1
  227. alita_sdk/tools/memory/__init__.py +12 -4
  228. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  229. alita_sdk/tools/ocr/__init__.py +11 -8
  230. alita_sdk/tools/openapi/__init__.py +491 -106
  231. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  232. alita_sdk/tools/openapi/tool.py +20 -0
  233. alita_sdk/tools/pandas/__init__.py +20 -12
  234. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  235. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  236. alita_sdk/tools/postman/__init__.py +10 -9
  237. alita_sdk/tools/pptx/__init__.py +11 -10
  238. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  239. alita_sdk/tools/qtest/__init__.py +31 -11
  240. alita_sdk/tools/qtest/api_wrapper.py +2135 -86
  241. alita_sdk/tools/rally/__init__.py +10 -9
  242. alita_sdk/tools/rally/api_wrapper.py +1 -1
  243. alita_sdk/tools/report_portal/__init__.py +12 -8
  244. alita_sdk/tools/salesforce/__init__.py +10 -8
  245. alita_sdk/tools/servicenow/__init__.py +17 -15
  246. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  247. alita_sdk/tools/sharepoint/__init__.py +10 -7
  248. alita_sdk/tools/sharepoint/api_wrapper.py +129 -38
  249. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  250. alita_sdk/tools/sharepoint/utils.py +8 -2
  251. alita_sdk/tools/slack/__init__.py +10 -7
  252. alita_sdk/tools/slack/api_wrapper.py +2 -2
  253. alita_sdk/tools/sql/__init__.py +12 -9
  254. alita_sdk/tools/testio/__init__.py +10 -7
  255. alita_sdk/tools/testrail/__init__.py +11 -10
  256. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  257. alita_sdk/tools/utils/__init__.py +9 -4
  258. alita_sdk/tools/utils/content_parser.py +103 -18
  259. alita_sdk/tools/utils/text_operations.py +410 -0
  260. alita_sdk/tools/utils/tool_prompts.py +79 -0
  261. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +30 -13
  262. alita_sdk/tools/xray/__init__.py +13 -9
  263. alita_sdk/tools/yagmail/__init__.py +9 -3
  264. alita_sdk/tools/zephyr/__init__.py +10 -7
  265. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -7
  266. alita_sdk/tools/zephyr_essential/__init__.py +10 -7
  267. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  268. alita_sdk/tools/zephyr_essential/client.py +2 -2
  269. alita_sdk/tools/zephyr_scale/__init__.py +11 -8
  270. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  271. alita_sdk/tools/zephyr_squad/__init__.py +10 -7
  272. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +154 -8
  273. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  274. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  275. alita_sdk-0.3.379.dist-info/RECORD +0 -360
  276. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  277. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  278. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,236 @@
1
+ """
2
+ Inventory Module for Knowledge Graph Construction and Retrieval.
3
+
4
+ This module provides two distinct capabilities:
5
+
6
+ 1. **Ingestion Pipeline** - A workflow for building/updating knowledge graphs
7
+ from source code repositories. NOT a toolkit - it's a defined process.
8
+
9
+ Usage:
10
+ from alita_sdk.community.inventory import (
11
+ IngestionPipeline,
12
+ ingest_repository,
13
+ PYTHON_PRESET,
14
+ TYPESCRIPT_PRESET,
15
+ get_preset
16
+ )
17
+
18
+ # Full pipeline with config
19
+ pipeline = IngestionPipeline(
20
+ llm=llm,
21
+ graph_path="./graph.json",
22
+ source_toolkits={'github': github_toolkit}
23
+ )
24
+ result = pipeline.run(source='github', branch='main')
25
+
26
+ # Or one-shot convenience function
27
+ result = ingest_repository(
28
+ llm=llm,
29
+ graph_path="./graph.json",
30
+ source_toolkit=github_toolkit,
31
+ source_name="github"
32
+ )
33
+
34
+ 2. **Retrieval Toolkit** - A pure query toolkit for retrieving context from
35
+ a pre-built knowledge graph. Can be added to any agent.
36
+
37
+ Usage:
38
+ from alita_sdk.community.inventory import InventoryRetrievalToolkit
39
+
40
+ # As a toolkit for agents
41
+ toolkit = InventoryRetrievalToolkit.get_toolkit(
42
+ graph_path="./graph.json",
43
+ base_directory="/path/to/source" # For local content retrieval
44
+ )
45
+ tools = toolkit.get_tools()
46
+
47
+ Entity Taxonomy (8 layers, 49 types):
48
+ - Product Layer: feature, product, user_story, requirement, epic
49
+ - Domain Layer: domain, subdomain, business_capability, value_stream, process
50
+ - Service Layer: service, microservice, api, api_endpoint, message_queue, event
51
+ - Code Layer: module, package, class, function, method, interface, trait, enum, type, variable, constant
52
+ - Data Layer: database, table, collection, schema, model, entity, field, index, query, migration
53
+ - Testing Layer: test_suite, test_case, test_fixture, mock, stub, assertion
54
+ - Delivery Layer: pipeline, job, stage, environment, deployment, artifact, container
55
+ - Organization Layer: team, repository, project, workspace, organization
56
+
57
+ Relationship Taxonomy (8 categories, 34 types):
58
+ - Structural: CONTAINS, IMPORTS, EXTENDS, IMPLEMENTS, USES, DEPENDS_ON, INSTANTIATES, COMPOSED_OF
59
+ - Behavioral: CALLS, INVOKES, TRIGGERS, HANDLES, SUBSCRIBES_TO, PUBLISHES_TO, RETURNS
60
+ - Data Lineage: READS_FROM, WRITES_TO, TRANSFORMS, QUERIES, STORES_IN, REFERENCES
61
+ - UI/Product: RENDERS, ROUTES_TO, NAVIGATES_TO, DISPLAYS
62
+ - Testing: TESTS, MOCKS, COVERS, ASSERTS
63
+ - Ownership: OWNED_BY, MAINTAINED_BY, CREATED_BY
64
+ - Temporal: PRECEDES, FOLLOWS, SCHEDULED_BY
65
+ - Semantic: RELATED_TO, SIMILAR_TO, ALIAS_OF
66
+ """
67
+
68
+ import logging
69
+ from typing import List, Optional, Dict, Any
70
+
71
+ # Configuration
72
+ from .config import (
73
+ IngestionConfig,
74
+ GuardrailsConfig,
75
+ generate_config_template,
76
+ )
77
+
78
+ # Ingestion Pipeline - workflow for graph building
79
+ from .ingestion import (
80
+ IngestionPipeline,
81
+ IngestionResult,
82
+ ingest_repository,
83
+ )
84
+
85
+ # Retrieval Toolkit - for querying graphs
86
+ from .retrieval import InventoryRetrievalApiWrapper
87
+
88
+ # Toolkit utilities - for configuration and instantiation
89
+ from .toolkit_utils import (
90
+ load_toolkit_config,
91
+ get_llm_for_config,
92
+ get_source_toolkit,
93
+ )
94
+
95
+ # Core graph types
96
+ from .knowledge_graph import KnowledgeGraph, Citation
97
+
98
+ # Extractors (for advanced use)
99
+ from .extractors import (
100
+ ENTITY_TAXONOMY,
101
+ RELATIONSHIP_TAXONOMY,
102
+ EntityExtractor,
103
+ RelationExtractor,
104
+ FactExtractor,
105
+ DocumentClassifier,
106
+ EntitySchemaDiscoverer,
107
+ )
108
+
109
+ # Toolkit wrapper for agent integration
110
+ from .toolkit import InventoryRetrievalToolkit
111
+
112
+ # Ingestion presets
113
+ from .presets import (
114
+ PYTHON_PRESET,
115
+ PYTHON_PRESET_WITH_TESTS,
116
+ JAVASCRIPT_PRESET,
117
+ TYPESCRIPT_PRESET,
118
+ REACT_PRESET,
119
+ NEXTJS_PRESET,
120
+ JAVA_PRESET,
121
+ SPRING_BOOT_PRESET,
122
+ MAVEN_PRESET,
123
+ GRADLE_PRESET,
124
+ DOTNET_PRESET,
125
+ CSHARP_PRESET,
126
+ ASPNET_PRESET,
127
+ FULLSTACK_JS_PRESET,
128
+ MONOREPO_PRESET,
129
+ DOCUMENTATION_PRESET,
130
+ PRESETS,
131
+ get_preset,
132
+ list_presets,
133
+ combine_presets,
134
+ )
135
+
136
+ logger = logging.getLogger(__name__)
137
+
138
+ name = "inventory"
139
+
140
+
141
+ def get_tools(tool: dict, tools_list: Optional[List[dict]] = None):
142
+ """
143
+ Get inventory retrieval tools for agent integration.
144
+
145
+ This function is called by the toolkit loader to get the
146
+ retrieval tools for querying a pre-built knowledge graph.
147
+
148
+ NOTE: For ingestion, use the IngestionPipeline directly, not through
149
+ the agent toolkit system. Ingestion is a workflow, not an agent task.
150
+
151
+ Args:
152
+ tool: The inventory toolkit configuration dict
153
+ tools_list: Optional list of all toolkit configs in the agent
154
+
155
+ Returns:
156
+ List of BaseTool instances for knowledge graph retrieval
157
+ """
158
+ settings = tool.get('settings', {})
159
+
160
+ # For retrieval, we need the graph path
161
+ graph_path = settings.get('graph_path')
162
+ if not graph_path:
163
+ logger.warning("Inventory toolkit requires graph_path setting for retrieval")
164
+
165
+ toolkit = InventoryRetrievalToolkit.get_toolkit(
166
+ selected_tools=settings.get('selected_tools', []),
167
+ toolkit_name=tool.get('toolkit_name'),
168
+ # Graph location
169
+ graph_path=graph_path,
170
+ # For local content retrieval
171
+ base_directory=settings.get('base_directory'),
172
+ # Source toolkits for remote content retrieval (optional)
173
+ source_toolkits=settings.get('source_toolkits', {}),
174
+ )
175
+ return toolkit.get_tools()
176
+
177
+
178
+ __all__ = [
179
+ # Module name
180
+ 'name',
181
+ 'get_tools',
182
+
183
+ # Configuration
184
+ 'IngestionConfig',
185
+ 'GuardrailsConfig',
186
+ 'generate_config_template',
187
+
188
+ # Ingestion (workflow)
189
+ 'IngestionPipeline',
190
+ 'IngestionResult',
191
+ 'ingest_repository',
192
+
193
+ # Retrieval (toolkit)
194
+ 'InventoryRetrievalToolkit',
195
+ 'InventoryRetrievalApiWrapper',
196
+
197
+ # Toolkit utilities
198
+ 'load_toolkit_config',
199
+ 'get_llm_for_config',
200
+ 'get_source_toolkit',
201
+
202
+ # Core types
203
+ 'KnowledgeGraph',
204
+ 'Citation',
205
+
206
+ # Extractors
207
+ 'ENTITY_TAXONOMY',
208
+ 'RELATIONSHIP_TAXONOMY',
209
+ 'EntityExtractor',
210
+ 'RelationExtractor',
211
+ 'FactExtractor',
212
+ 'DocumentClassifier',
213
+ 'EntitySchemaDiscoverer',
214
+
215
+ # Presets
216
+ 'PYTHON_PRESET',
217
+ 'PYTHON_PRESET_WITH_TESTS',
218
+ 'JAVASCRIPT_PRESET',
219
+ 'TYPESCRIPT_PRESET',
220
+ 'REACT_PRESET',
221
+ 'NEXTJS_PRESET',
222
+ 'JAVA_PRESET',
223
+ 'SPRING_BOOT_PRESET',
224
+ 'MAVEN_PRESET',
225
+ 'GRADLE_PRESET',
226
+ 'DOTNET_PRESET',
227
+ 'CSHARP_PRESET',
228
+ 'ASPNET_PRESET',
229
+ 'FULLSTACK_JS_PRESET',
230
+ 'MONOREPO_PRESET',
231
+ 'DOCUMENTATION_PRESET',
232
+ 'PRESETS',
233
+ 'get_preset',
234
+ 'list_presets',
235
+ 'combine_presets',
236
+ ]
@@ -0,0 +1,257 @@
1
+ """
2
+ Configuration for Inventory Ingestion Pipeline.
3
+
4
+ Since the ingestion runs within Alita, the LLM and embeddings are provided
5
+ by the Alita client. Configuration only needs model names, not providers.
6
+
7
+ Usage:
8
+ # From YAML config file
9
+ config = IngestionConfig.from_yaml("./ingestion-config.yml")
10
+
11
+ # Programmatic
12
+ config = IngestionConfig(
13
+ llm_model="gpt-4o-mini",
14
+ embedding_model="text-embedding-3-small",
15
+ guardrails=GuardrailsConfig(
16
+ max_tokens_per_doc=8000,
17
+ max_entities_per_doc=50,
18
+ )
19
+ )
20
+
21
+ # Use in pipeline (Alita client provides LLM/embeddings)
22
+ pipeline = IngestionPipeline(
23
+ llm=alita.get_langchain_llm(config.llm_model),
24
+ embedding=alita.get_embeddings(config.embedding_model),
25
+ graph_path=config.graph_path,
26
+ guardrails=config.guardrails,
27
+ )
28
+ """
29
+
30
+ import os
31
+ import logging
32
+ from typing import Any, Optional, Dict, List
33
+ from pydantic import BaseModel, Field
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ class GuardrailsConfig(BaseModel):
39
+ """Guardrails configuration for safe and controlled extraction."""
40
+
41
+ # Token/content limits
42
+ max_tokens_per_doc: int = Field(
43
+ default=8000,
44
+ description="Maximum tokens per document before chunking"
45
+ )
46
+ max_entities_per_doc: int = Field(
47
+ default=50,
48
+ description="Maximum entities to extract from a single document"
49
+ )
50
+ max_relations_per_doc: int = Field(
51
+ default=100,
52
+ description="Maximum relations to extract per document"
53
+ )
54
+
55
+ # Content filtering
56
+ content_filter_enabled: bool = Field(
57
+ default=True,
58
+ description="Enable content filtering for PII/secrets"
59
+ )
60
+ filter_patterns: List[str] = Field(
61
+ default_factory=lambda: [
62
+ r'(?i)(password|secret|api[_-]?key|token)\s*[=:]\s*["\'][^"\']+["\']',
63
+ r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
64
+ r'-----BEGIN [A-Z]+ PRIVATE KEY-----',
65
+ ],
66
+ description="Regex patterns to filter from content before LLM processing"
67
+ )
68
+
69
+ # Rate limiting
70
+ rate_limit_requests_per_minute: Optional[int] = Field(
71
+ default=None,
72
+ description="Max LLM requests per minute (None = unlimited)"
73
+ )
74
+ rate_limit_tokens_per_minute: Optional[int] = Field(
75
+ default=None,
76
+ description="Max tokens per minute (None = unlimited)"
77
+ )
78
+
79
+ # Error handling
80
+ max_retries: int = Field(default=3, description="Max retries on LLM errors")
81
+ retry_delay_seconds: float = Field(default=1.0, description="Delay between retries")
82
+ skip_on_error: bool = Field(
83
+ default=True,
84
+ description="Skip document on extraction error vs fail pipeline"
85
+ )
86
+
87
+ # Validation
88
+ validate_entity_types: bool = Field(
89
+ default=True,
90
+ description="Validate extracted entities against taxonomy"
91
+ )
92
+ validate_relation_types: bool = Field(
93
+ default=True,
94
+ description="Validate extracted relations against taxonomy"
95
+ )
96
+
97
+ # Deduplication
98
+ deduplicate_entities: bool = Field(
99
+ default=True,
100
+ description="Merge duplicate entities by name+type+file"
101
+ )
102
+
103
+ # Confidence thresholds
104
+ entity_confidence_threshold: float = Field(
105
+ default=0.5,
106
+ description="Minimum confidence for entity extraction"
107
+ )
108
+ relation_confidence_threshold: float = Field(
109
+ default=0.5,
110
+ description="Minimum confidence for relation extraction"
111
+ )
112
+
113
+
114
+ class IngestionConfig(BaseModel):
115
+ """
116
+ Configuration for the ingestion pipeline.
117
+
118
+ Since ingestion runs within Alita, only model names are needed.
119
+ The Alita client handles provider details, API keys, etc.
120
+ """
121
+
122
+ # Model names (Alita provides the actual LLM/embedding instances)
123
+ llm_model: str = Field(
124
+ default="gpt-4o-mini",
125
+ description="LLM model name (e.g., gpt-4o-mini, claude-3-sonnet)"
126
+ )
127
+ embedding_model: Optional[str] = Field(
128
+ default=None,
129
+ description="Embedding model name (optional, for semantic search)"
130
+ )
131
+
132
+ # Model parameters
133
+ temperature: float = Field(default=0.0, description="LLM temperature")
134
+
135
+ # Guardrails configuration
136
+ guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig)
137
+
138
+ # Graph configuration
139
+ graph_path: str = Field(default="./knowledge_graph.json", description="Path to persist graph")
140
+ auto_save: bool = Field(default=True, description="Auto-save after mutations")
141
+
142
+ # Extraction settings
143
+ extract_relations: bool = Field(default=True, description="Extract relations between entities")
144
+ chunk_size: int = Field(default=4000, description="Document chunk size for processing")
145
+ chunk_overlap: int = Field(default=200, description="Overlap between chunks")
146
+
147
+ # Concurrency
148
+ max_concurrent_extractions: int = Field(
149
+ default=1,
150
+ description="Max parallel extraction tasks (1 = sequential)"
151
+ )
152
+
153
+ @classmethod
154
+ def from_yaml(cls, path: str) -> "IngestionConfig":
155
+ """Load configuration from YAML file."""
156
+ import yaml
157
+
158
+ with open(path, 'r') as f:
159
+ data = yaml.safe_load(f)
160
+
161
+ return cls(**data)
162
+
163
+ @classmethod
164
+ def from_json(cls, path: str) -> "IngestionConfig":
165
+ """Load configuration from JSON file."""
166
+ import json
167
+
168
+ with open(path, 'r') as f:
169
+ data = json.load(f)
170
+
171
+ return cls(**data)
172
+
173
+ @classmethod
174
+ def from_env(cls) -> "IngestionConfig":
175
+ """
176
+ Create configuration from environment variables.
177
+
178
+ Environment variables:
179
+ LLM_MODEL: Model name (default: gpt-4o-mini)
180
+ EMBEDDING_MODEL: Embedding model name (optional)
181
+ LLM_TEMPERATURE: Temperature (default: 0.0)
182
+ GRAPH_PATH: Path to save graph (default: ./knowledge_graph.json)
183
+ MAX_TOKENS_PER_DOC: Max tokens per doc (default: 8000)
184
+ MAX_ENTITIES_PER_DOC: Max entities per doc (default: 50)
185
+ CONTENT_FILTER_ENABLED: true/false (default: true)
186
+ EXTRACT_RELATIONS: true/false (default: true)
187
+ """
188
+ guardrails = GuardrailsConfig(
189
+ max_tokens_per_doc=int(os.environ.get('MAX_TOKENS_PER_DOC', '8000')),
190
+ max_entities_per_doc=int(os.environ.get('MAX_ENTITIES_PER_DOC', '50')),
191
+ content_filter_enabled=os.environ.get('CONTENT_FILTER_ENABLED', 'true').lower() == 'true',
192
+ max_retries=int(os.environ.get('MAX_RETRIES', '3')),
193
+ )
194
+
195
+ return cls(
196
+ llm_model=os.environ.get('LLM_MODEL', 'gpt-4o-mini'),
197
+ embedding_model=os.environ.get('EMBEDDING_MODEL'),
198
+ temperature=float(os.environ.get('LLM_TEMPERATURE', '0.0')),
199
+ guardrails=guardrails,
200
+ graph_path=os.environ.get('GRAPH_PATH', './knowledge_graph.json'),
201
+ extract_relations=os.environ.get('EXTRACT_RELATIONS', 'true').lower() == 'true',
202
+ )
203
+
204
+ def to_yaml(self, path: str) -> None:
205
+ """Save configuration to YAML file."""
206
+ import yaml
207
+
208
+ with open(path, 'w') as f:
209
+ yaml.safe_dump(self.model_dump(), f, default_flow_style=False)
210
+
211
+ def to_dict(self) -> Dict[str, Any]:
212
+ """Convert to dictionary."""
213
+ return self.model_dump()
214
+
215
+
216
+ # Example YAML configuration template
217
+ EXAMPLE_CONFIG_YAML = """# Inventory Ingestion Configuration
218
+ # Model names only - Alita provides the actual LLM/embedding instances
219
+
220
+ # LLM model name (required)
221
+ llm_model: gpt-4o-mini
222
+ temperature: 0.0
223
+
224
+ # Embedding model (optional, for semantic search)
225
+ embedding_model: text-embedding-3-small
226
+
227
+ # Guardrails - safety and control
228
+ guardrails:
229
+ max_tokens_per_doc: 8000
230
+ max_entities_per_doc: 50
231
+ max_relations_per_doc: 100
232
+ content_filter_enabled: true
233
+ max_retries: 3
234
+ retry_delay_seconds: 1.0
235
+ skip_on_error: true
236
+ entity_confidence_threshold: 0.5
237
+ relation_confidence_threshold: 0.5
238
+ deduplicate_entities: true
239
+ # rate_limit_requests_per_minute: 60 # Uncomment to rate limit
240
+
241
+ # Graph persistence
242
+ graph_path: ./knowledge_graph.json
243
+ auto_save: true
244
+
245
+ # Extraction settings
246
+ extract_relations: true
247
+ chunk_size: 4000
248
+ chunk_overlap: 200
249
+ max_concurrent_extractions: 1
250
+ """
251
+
252
+
253
+ def generate_config_template(output_path: str = "./ingestion-config.yml") -> str:
254
+ """Generate a configuration template file."""
255
+ with open(output_path, 'w') as f:
256
+ f.write(EXAMPLE_CONFIG_YAML)
257
+ return output_path