@chimerai/cli 1.2.7 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -883,6 +883,17 @@ ANTHROPIC_API_KEY=
883
883
  AZURE_OPENAI_API_KEY=
884
884
  AZURE_OPENAI_ENDPOINT=
885
885
 
886
+ # Ollama (local models — leave empty to disable auto-prefix)
887
+ OLLAMA_BASE_URL=http://localhost:11434
888
+
889
+ # RAG / Embedding settings
890
+ # Model used for generating embeddings (e.g. text-embedding-ada-002, nomic-embed-text)
891
+ DEFAULT_EMBEDDING_MODEL=text-embedding-ada-002
892
+ # Dimension must match the embedding model (OpenAI ada-002=1536, nomic-embed-text=768)
893
+ EMBEDDING_DIMENSION=1536
894
+ # Default chat model for RAG responses (e.g. gpt-3.5-turbo, llama3.2)
895
+ DEFAULT_CHAT_MODEL=gpt-3.5-turbo
896
+
886
897
  `;
887
898
  if (features.includes('billing')) {
888
899
  envContent += `# Stripe
@@ -1 +1 @@
1
- {"version":3,"file":"ai-service.d.ts","sourceRoot":"","sources":["../../src/templates/ai-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AASH,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAMD,eAAO,MAAM,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,QAAQ,CAmD9C,CAAC;AAMF,wBAAgB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,iBAAiB,GAAG,IAAI,CAQ1E;AAED,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,GAAG,IAAI,CAIpF;AAMD,wBAAgB,sBAAsB,IAAI,MAAM,CAwM/C;AAMD,wBAAgB,oBAAoB,IAAI,MAAM,CAG7C;AAMD,wBAAgB,mBAAmB,IAAI,MAAM,CAqN5C;AAMD,wBAAgB,oBAAoB,IAAI,MAAM,CAwG7C;AAMD,wBAAgB,yBAAyB,IAAI,MAAM,CAiHlD;AAMD,wBAAgB,wBAAwB,IAAI,MAAM,CAyGjD;AAMD,wBAAgB,qBAAqB,IAAI,MAAM,CA6I9C;AAED,wBAAgB,kBAAkB,IAAI,MAAM,CAkJ3C;AAMD,wBAAgB,mBAAmB,IAAI,MAAM,CAuN5C;AAOD,wBAAgB,yBAAyB,IAAI,MAAM,CAyLlD;AAMD;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,KAAK,GAAE,MAAM,EAAO,GAAG,MAAM,CAkKrF;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,CA4CjE;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,CA6OjE;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,KAAK,GAAE,MAAM,EAAO,GAAG,MAAM,CAsD7F;AAED;;GAEG;AACH,wBAAgB,2BAA2B,IAAI,MAAM,CAwBpD;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,KAAK,GAAE,MAAM,EAAO,GAAG,MAAM,CAuEvF;AAED;;GAEG;AACH,wBAAgB,8BAA8B,IAAI,MAAM,CAoBvD"}
1
+ {"version":3,"file":"ai-service.d.ts","sourceRoot":"","sources":["../../src/templates/ai-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AASH,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAMD,eAAO,MAAM,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,QAAQ,CAmD9C,CAAC;AAMF,wBAAgB,cAAc,CAAC,SAAS,EAAE,MAAM,GAAG,iBAAiB,GAAG,IAAI,CAQ1E;AAED,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,iBAAiB,GAAG,IAAI,CAIpF;AAMD,wBAAgB,sBAAsB,IAAI,MAAM,CAwM/C;AAMD,wBAAgB,oBAAoB,IAAI,MAAM,CAG7C;AAMD,wBAAgB,mBAAmB,IAAI,MAAM,CAqN5C;AAMD,wBAAgB,oBAAoB,IAAI,MAAM,CAwG7C;AAMD,wBAAgB,yBAAyB,IAAI,MAAM,CAiHlD;AAMD,wBAAgB,wBAAwB,IAAI,MAAM,CAyGjD;AAMD,wBAAgB,qBAAqB,IAAI,MAAM,CA6I9C;AAED,wBAAgB,kBAAkB,IAAI,MAAM,CAuJ3C;AAMD,wBAAgB,mBAAmB,IAAI,MAAM,CA4Q5C;AAOD,wBAAgB,yBAAyB,IAAI,MAAM,CAyLlD;AAMD;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,KAAK,GAAE,MAAM,EAAO,GAAG,MAAM,CAkMrF;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,CAsFjE;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,CA6OjE;AAED;;GAEG;AACH,wBAAgB,6BAA6B,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,KAAK,GAAE,MAAM,EAAO,GAAG,MAAM,CAsD7F;AAED;;GAEG;AACH,wBAAgB,2BAA2B,IAAI,MAAM,CAwBpD;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,KAAK,GAAE,MAAM,EAAO,GAAG,MAAM,CAuEvF;AAED;;GAEG;AACH,wBAAgB,8BAA8B,IAAI,MAAM,CAoBvD"}
@@ -1026,7 +1026,12 @@ from typing import List, Optional
1026
1026
  import structlog
1027
1027
 
1028
1028
  from config import settings
1029
- from services.vector_store import vector_store, FAISS_AVAILABLE
1029
+ from services import vector_store as _vs_module
1030
+ from services.vector_store import FAISS_AVAILABLE
1031
+
1032
+
1033
+ def _get_vector_store():
1034
+ return _vs_module.vector_store
1030
1035
  from services.chat_service import chat_service
1031
1036
  from models import ChatCompletionRequest, ChatMessage, MessageRole
1032
1037
 
@@ -1037,7 +1042,7 @@ class RAGService:
1037
1042
  """Service for Retrieval Augmented Generation (RAG)."""
1038
1043
 
1039
1044
  def _check_availability(self):
1040
- if not FAISS_AVAILABLE or vector_store is None:
1045
+ if not FAISS_AVAILABLE or _get_vector_store() is None:
1041
1046
  raise RuntimeError(
1042
1047
  "FAISS vector store is not available. "
1043
1048
  "Install faiss-cpu: pip install faiss-cpu numpy"
@@ -1050,13 +1055,13 @@ class RAGService:
1050
1055
  ) -> dict:
1051
1056
  try:
1052
1057
  self._check_availability()
1053
- ids = await vector_store.add_texts(documents, metadatas)
1058
+ ids = await _get_vector_store().add_texts(documents, metadatas)
1054
1059
  logger.info("documents_added", count=len(ids))
1055
1060
  return {
1056
1061
  "status": "success",
1057
1062
  "added": len(ids),
1058
1063
  "ids": ids,
1059
- "total_vectors": vector_store.get_stats()["total_vectors"],
1064
+ "total_vectors": _get_vector_store().get_stats()["total_vectors"],
1060
1065
  }
1061
1066
  except Exception as e:
1062
1067
  logger.error("add_documents_failed", error=str(e))
@@ -1065,7 +1070,7 @@ class RAGService:
1065
1070
  async def search_documents(self, query: str, k: int = 4) -> List[dict]:
1066
1071
  try:
1067
1072
  self._check_availability()
1068
- results = await vector_store.similarity_search(query, k=k)
1073
+ results = await _get_vector_store().similarity_search(query, k=k)
1069
1074
  logger.info("documents_searched", query_length=len(query), results=len(results))
1070
1075
  return results
1071
1076
  except Exception as e:
@@ -1085,7 +1090,7 @@ class RAGService:
1085
1090
  try:
1086
1091
  self._check_availability()
1087
1092
 
1088
- relevant_docs = await vector_store.similarity_search(query, k=k)
1093
+ relevant_docs = await _get_vector_store().similarity_search(query, k=k)
1089
1094
 
1090
1095
  context_parts = []
1091
1096
  for i, doc in enumerate(relevant_docs, 1):
@@ -1104,7 +1109,7 @@ Context:
1104
1109
  ChatMessage(role=MessageRole.USER, content=query),
1105
1110
  ]
1106
1111
 
1107
- model = model or settings.default_chat_model
1112
+ model = model or settings.resolved_chat_model
1108
1113
 
1109
1114
  chat_request = ChatCompletionRequest(
1110
1115
  model=model,
@@ -1141,22 +1146,22 @@ Context:
1141
1146
 
1142
1147
  def get_stats(self) -> dict:
1143
1148
  self._check_availability()
1144
- return vector_store.get_stats()
1149
+ return _get_vector_store().get_stats()
1145
1150
 
1146
1151
  def clear_store(self):
1147
1152
  self._check_availability()
1148
- vector_store.clear()
1153
+ _get_vector_store().clear()
1149
1154
  return {"status": "success", "message": "Vector store cleared"}
1150
1155
 
1151
1156
  async def delete_documents(self, document_ids: List[int]) -> dict:
1152
1157
  try:
1153
1158
  self._check_availability()
1154
- deleted = await vector_store.delete_by_ids(document_ids)
1159
+ deleted = await _get_vector_store().delete_by_ids(document_ids)
1155
1160
  logger.info("documents_deleted", requested=len(document_ids), deleted=deleted)
1156
1161
  return {
1157
1162
  "status": "success",
1158
1163
  "deleted": deleted,
1159
- "remaining_vectors": vector_store.get_stats()["total_vectors"],
1164
+ "remaining_vectors": _get_vector_store().get_stats()["total_vectors"],
1160
1165
  }
1161
1166
  except Exception as e:
1162
1167
  logger.error("delete_documents_failed", error=str(e))
@@ -1183,6 +1188,7 @@ except Exception as e:
1183
1188
  import pickle
1184
1189
  import os
1185
1190
  from typing import List, Dict, Any, Optional
1191
+ import httpx
1186
1192
  import litellm
1187
1193
  from langchain_text_splitters import RecursiveCharacterTextSplitter
1188
1194
  from config import settings
@@ -1345,8 +1351,32 @@ class VectorStore:
1345
1351
 
1346
1352
  async def _generate_embeddings(self, texts: List[str]) -> List[List[float]]:
1347
1353
  try:
1354
+ model = settings.resolved_embedding_model
1355
+ is_ollama = model.startswith("ollama/") or model.startswith("ollama_chat/")
1356
+
1357
+ if is_ollama:
1358
+ # Use Ollama's /api/embed endpoint directly (supports array input)
1359
+ model_name = model.split("/", 1)[1] # strip "ollama/" prefix
1360
+ base_url = settings.ollama_base_url.rstrip("/")
1361
+ async with httpx.AsyncClient(timeout=60.0) as client:
1362
+ resp = await client.post(
1363
+ f"{base_url}/api/embed",
1364
+ json={"model": model_name, "input": texts},
1365
+ )
1366
+ resp.raise_for_status()
1367
+ data = resp.json()
1368
+ embeddings = data.get("embeddings") or data.get("embedding")
1369
+ if embeddings is None:
1370
+ raise ValueError(f"Unexpected Ollama embed response: {data}")
1371
+ # /api/embed always returns a list of vectors
1372
+ if isinstance(embeddings[0], (int, float)):
1373
+ embeddings = [embeddings] # single vector → wrap
1374
+ logger.info("embeddings_generated", count=len(embeddings), model=model)
1375
+ return embeddings
1376
+
1377
+ # Non-Ollama: use LiteLLM
1348
1378
  response = await litellm.aembedding(
1349
- model=settings.default_embedding_model,
1379
+ model=model,
1350
1380
  input=texts,
1351
1381
  )
1352
1382
  embeddings = [
@@ -1355,7 +1385,7 @@ class VectorStore:
1355
1385
  ]
1356
1386
  logger.info("embeddings_generated",
1357
1387
  count=len(embeddings),
1358
- model=settings.default_embedding_model)
1388
+ model=settings.resolved_embedding_model)
1359
1389
  return embeddings
1360
1390
  except Exception as e:
1361
1391
  logger.error("embedding_generation_failed", error=str(e))
@@ -1375,14 +1405,42 @@ class VectorStore:
1375
1405
  logger.info("faiss_index_cleared")
1376
1406
 
1377
1407
 
1378
- if FAISS_AVAILABLE:
1408
+ # Initialized lazily at startup via init_vector_store()
1409
+ vector_store: Optional["VectorStore"] = None
1410
+
1411
+
1412
+ def init_vector_store(dimension: int) -> Optional["VectorStore"]:
1413
+ """Initialize (or re-initialize) the global vector store with the given dimension.
1414
+
1415
+ If an existing FAISS index has a different dimension it is automatically
1416
+ cleared and rebuilt — no manual file deletion required.
1417
+ """
1418
+ global vector_store
1419
+
1420
+ if not FAISS_AVAILABLE:
1421
+ logger.warning("faiss_not_available_skipping_vector_store")
1422
+ return None
1423
+
1379
1424
  try:
1380
- vector_store = VectorStore(dimension=settings.embedding_dimension)
1425
+ instance = VectorStore(dimension=dimension)
1426
+
1427
+ # Dimension mismatch: existing index was built with a different model
1428
+ if instance.index is not None and instance.index.d != dimension:
1429
+ logger.warning(
1430
+ "embedding_dimension_mismatch",
1431
+ index_dimension=instance.index.d,
1432
+ model_dimension=dimension,
1433
+ action="clearing_index",
1434
+ )
1435
+ instance._create_new_index()
1436
+ instance.save()
1437
+
1438
+ vector_store = instance
1439
+ logger.info("vector_store_initialized", dimension=dimension)
1440
+ return vector_store
1381
1441
  except Exception as e:
1382
- logger.warning(f"Failed to initialize vector store: {e}")
1383
- vector_store = None
1384
- else:
1385
- vector_store = None
1442
+ logger.error("vector_store_init_failed", error=str(e))
1443
+ return None
1386
1444
  `;
1387
1445
  }
1388
1446
  // ============================================================================
@@ -1593,6 +1651,7 @@ function generateAiServiceMain(modules, tools = []) {
1593
1651
  routers.push('app.include_router(chat_router)');
1594
1652
  }
1595
1653
  if (hasRag) {
1654
+ imports.push('from services.vector_store import init_vector_store');
1596
1655
  imports.push('from routes.rag_routes import router as rag_router');
1597
1656
  routers.push('app.include_router(rag_router)');
1598
1657
  }
@@ -1657,7 +1716,38 @@ async def lifespan(app: FastAPI):
1657
1716
  except Exception as exc:
1658
1717
  logger.warning("provider_client_error", error=str(exc))
1659
1718
 
1660
- logger.info("ai_service_started", provider_mode=provider_mode)
1719
+ # --- Auto-detect embedding dimension (RAG) ---
1720
+ detected_dim = settings.embedding_dimension # fallback from .env / default
1721
+ model = settings.resolved_embedding_model
1722
+ embed_kwargs: dict = {}
1723
+ if model.startswith("ollama/") or model.startswith("ollama_chat/"):
1724
+ embed_kwargs["api_base"] = settings.ollama_base_url
1725
+
1726
+ try:
1727
+ is_ollama = model.startswith("ollama/") or model.startswith("ollama_chat/")
1728
+ if is_ollama:
1729
+ import httpx
1730
+ model_name = model.split("/", 1)[1]
1731
+ base_url = settings.ollama_base_url.rstrip("/")
1732
+ async with httpx.AsyncClient(timeout=30.0) as hx:
1733
+ resp = await hx.post(f"{base_url}/api/embed", json={"model": model_name, "input": ["dimension probe"]})
1734
+ resp.raise_for_status()
1735
+ data = resp.json()
1736
+ vec = (data.get("embeddings") or [data.get("embedding")])[0]
1737
+ detected_dim = len(vec)
1738
+ else:
1739
+ import litellm
1740
+ test_resp = await litellm.aembedding(model=model, input=["dimension probe"], **embed_kwargs)
1741
+ detected_dim = len(test_resp.data[0]["embedding"] if isinstance(test_resp.data[0], dict) else test_resp.data[0].embedding)
1742
+ logger.info("embedding_dimension_detected", model=model, dimension=detected_dim)
1743
+ except Exception as exc:
1744
+ logger.warning("embedding_dimension_detection_failed", model=model, error=str(exc),
1745
+ fallback_dimension=detected_dim)
1746
+
1747
+ settings.embedding_dimension = detected_dim
1748
+ init_vector_store(detected_dim)
1749
+
1750
+ logger.info("ai_service_started", provider_mode=provider_mode, embedding_model=model, embedding_dimension=detected_dim)
1661
1751
  yield
1662
1752
 
1663
1753
  await provider_client.close()
@@ -1751,13 +1841,36 @@ function generateAiServiceConfig(modules) {
1751
1841
  # RAG Settings
1752
1842
  default_embedding_model: str = "text-embedding-ada-002"
1753
1843
  embedding_dimension: int = 1536
1844
+ `
1845
+ : '';
1846
+ const ragProperties = hasRag
1847
+ ? `
1848
+ @property
1849
+ def resolved_embedding_model(self) -> str:
1850
+ """Return the embedding model with provider prefix.
1851
+
1852
+ If OLLAMA_BASE_URL is set and the model has no provider prefix
1853
+ (e.g. 'nomic-embed-text' or 'nomic-embed-text:latest'),
1854
+ automatically prepend 'ollama/' so LiteLLM/httpx can route correctly.
1855
+ """
1856
+ model = self.default_embedding_model
1857
+ known_prefixes = ("ollama/", "ollama_chat/", "openai/", "anthropic/",
1858
+ "azure/", "cohere/", "huggingface/", "together_ai/")
1859
+ if not any(model.startswith(p) for p in known_prefixes):
1860
+ if self.ollama_base_url:
1861
+ return f"ollama/{model}"
1862
+ return model
1754
1863
  `
1755
1864
  : '';
1756
1865
  return `"""ChimerAI AI Service Configuration. Auto-generated by ChimerAI CLI."""
1757
1866
 
1867
+ import os
1758
1868
  from pydantic_settings import BaseSettings
1759
1869
  from typing import Optional
1760
1870
 
1871
+ # Look for .env in services/ai/ first, then fall back to project root
1872
+ _env_file = ".env" if os.path.exists(".env") else "../../.env"
1873
+
1761
1874
 
1762
1875
  class Settings(BaseSettings):
1763
1876
  # Service
@@ -1772,6 +1885,9 @@ class Settings(BaseSettings):
1772
1885
  # API Keys (Fallback)
1773
1886
  openai_api_key: Optional[str] = None
1774
1887
 
1888
+ # Ollama (local, no API key needed)
1889
+ ollama_base_url: str = "http://localhost:11434"
1890
+
1775
1891
  # Caching
1776
1892
  redis_url: str = "redis://localhost:6379"
1777
1893
  redis_enabled: bool = False
@@ -1779,8 +1895,23 @@ ${ragFields}
1779
1895
  # Default Model
1780
1896
  default_chat_model: str = "gpt-3.5-turbo"
1781
1897
 
1898
+ @property
1899
+ def resolved_chat_model(self) -> str:
1900
+ """Return the chat model with provider prefix.
1901
+
1902
+ If OLLAMA_BASE_URL is set and the model has no provider prefix,
1903
+ automatically prepend 'ollama/'.
1904
+ """
1905
+ model = self.default_chat_model
1906
+ known_prefixes = ("ollama/", "ollama_chat/", "openai/", "anthropic/",
1907
+ "azure/", "cohere/", "huggingface/", "together_ai/")
1908
+ if not any(model.startswith(p) for p in known_prefixes):
1909
+ if self.ollama_base_url:
1910
+ return f"ollama/{model}"
1911
+ return model
1912
+ ${ragProperties}
1782
1913
  class Config:
1783
- env_file = ".env"
1914
+ env_file = _env_file
1784
1915
  extra = "ignore"
1785
1916
 
1786
1917
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chimerai/cli",
3
- "version": "1.2.7",
3
+ "version": "1.2.8",
4
4
  "description": "CLI wizard for ChimerAI starter kit — scaffold auth, RBAC, AI chat, billing and more into any Next.js project",
5
5
  "main": "./dist/index.js",
6
6
  "bin": {