langroid 0.45.1__tar.gz → 0.45.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. {langroid-0.45.1 → langroid-0.45.3}/PKG-INFO +1 -1
  2. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/base.py +31 -8
  3. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/chat_agent.py +10 -6
  4. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/doc_chat_agent.py +3 -2
  5. {langroid-0.45.1 → langroid-0.45.3}/langroid/mytypes.py +2 -1
  6. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/document_parser.py +10 -4
  7. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/parser.py +3 -0
  8. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/output/citations.py +15 -5
  9. {langroid-0.45.1 → langroid-0.45.3}/pyproject.toml +1 -1
  10. {langroid-0.45.1 → langroid-0.45.3}/.gitignore +0 -0
  11. {langroid-0.45.1 → langroid-0.45.3}/LICENSE +0 -0
  12. {langroid-0.45.1 → langroid-0.45.3}/README.md +0 -0
  13. {langroid-0.45.1 → langroid-0.45.3}/langroid/__init__.py +0 -0
  14. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/__init__.py +0 -0
  15. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/batch.py +0 -0
  16. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/callbacks/__init__.py +0 -0
  17. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/callbacks/chainlit.py +0 -0
  18. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/chat_document.py +0 -0
  19. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/openai_assistant.py +0 -0
  20. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/__init__.py +0 -0
  21. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/arangodb/__init__.py +0 -0
  22. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
  23. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/arangodb/system_messages.py +0 -0
  24. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/arangodb/tools.py +0 -0
  25. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/arangodb/utils.py +0 -0
  26. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  27. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/lance_rag/__init__.py +0 -0
  28. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  29. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  30. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  31. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/lance_tools.py +0 -0
  32. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/neo4j/__init__.py +0 -0
  33. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  34. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  35. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/neo4j/system_messages.py +0 -0
  36. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/neo4j/tools.py +0 -0
  37. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  38. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/retriever_agent.py +0 -0
  39. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/sql/__init__.py +0 -0
  40. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  41. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/sql/utils/__init__.py +0 -0
  42. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  43. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  44. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/sql/utils/system_message.py +0 -0
  45. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/sql/utils/tools.py +0 -0
  46. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/special/table_chat_agent.py +0 -0
  47. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/task.py +0 -0
  48. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tool_message.py +0 -0
  49. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/__init__.py +0 -0
  50. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  51. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/exa_search_tool.py +0 -0
  52. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/file_tools.py +0 -0
  53. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/google_search_tool.py +0 -0
  54. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  55. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/orchestration.py +0 -0
  56. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/recipient_tool.py +0 -0
  57. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/retrieval_tool.py +0 -0
  58. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/rewind_tool.py +0 -0
  59. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/segment_extract_tool.py +0 -0
  60. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/tools/tavily_search_tool.py +0 -0
  61. {langroid-0.45.1 → langroid-0.45.3}/langroid/agent/xml_tool_message.py +0 -0
  62. {langroid-0.45.1 → langroid-0.45.3}/langroid/cachedb/__init__.py +0 -0
  63. {langroid-0.45.1 → langroid-0.45.3}/langroid/cachedb/base.py +0 -0
  64. {langroid-0.45.1 → langroid-0.45.3}/langroid/cachedb/momento_cachedb.py +0 -0
  65. {langroid-0.45.1 → langroid-0.45.3}/langroid/cachedb/redis_cachedb.py +0 -0
  66. {langroid-0.45.1 → langroid-0.45.3}/langroid/embedding_models/__init__.py +0 -0
  67. {langroid-0.45.1 → langroid-0.45.3}/langroid/embedding_models/base.py +0 -0
  68. {langroid-0.45.1 → langroid-0.45.3}/langroid/embedding_models/models.py +0 -0
  69. {langroid-0.45.1 → langroid-0.45.3}/langroid/embedding_models/protoc/__init__.py +0 -0
  70. {langroid-0.45.1 → langroid-0.45.3}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  71. {langroid-0.45.1 → langroid-0.45.3}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  72. {langroid-0.45.1 → langroid-0.45.3}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  73. {langroid-0.45.1 → langroid-0.45.3}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  74. {langroid-0.45.1 → langroid-0.45.3}/langroid/embedding_models/remote_embeds.py +0 -0
  75. {langroid-0.45.1 → langroid-0.45.3}/langroid/exceptions.py +0 -0
  76. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/__init__.py +0 -0
  77. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/azure_openai.py +0 -0
  78. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/base.py +0 -0
  79. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/config.py +0 -0
  80. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/mock_lm.py +0 -0
  81. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/model_info.py +0 -0
  82. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/openai_gpt.py +0 -0
  83. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  84. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/prompt_formatter/base.py +0 -0
  85. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  86. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  87. {langroid-0.45.1 → langroid-0.45.3}/langroid/language_models/utils.py +0 -0
  88. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/__init__.py +0 -0
  89. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/agent_chats.py +0 -0
  90. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/code_parser.py +0 -0
  91. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/para_sentence_split.py +0 -0
  92. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/parse_json.py +0 -0
  93. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/pdf_utils.py +0 -0
  94. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/repo_loader.py +0 -0
  95. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/routing.py +0 -0
  96. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/search.py +0 -0
  97. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/spider.py +0 -0
  98. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/table_loader.py +0 -0
  99. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/url_loader.py +0 -0
  100. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/urls.py +0 -0
  101. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/utils.py +0 -0
  102. {langroid-0.45.1 → langroid-0.45.3}/langroid/parsing/web_search.py +0 -0
  103. {langroid-0.45.1 → langroid-0.45.3}/langroid/prompts/__init__.py +0 -0
  104. {langroid-0.45.1 → langroid-0.45.3}/langroid/prompts/dialog.py +0 -0
  105. {langroid-0.45.1 → langroid-0.45.3}/langroid/prompts/prompts_config.py +0 -0
  106. {langroid-0.45.1 → langroid-0.45.3}/langroid/prompts/templates.py +0 -0
  107. {langroid-0.45.1 → langroid-0.45.3}/langroid/py.typed +0 -0
  108. {langroid-0.45.1 → langroid-0.45.3}/langroid/pydantic_v1/__init__.py +0 -0
  109. {langroid-0.45.1 → langroid-0.45.3}/langroid/pydantic_v1/main.py +0 -0
  110. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/__init__.py +0 -0
  111. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/algorithms/__init__.py +0 -0
  112. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/algorithms/graph.py +0 -0
  113. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/configuration.py +0 -0
  114. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/constants.py +0 -0
  115. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/git_utils.py +0 -0
  116. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/globals.py +0 -0
  117. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/logging.py +0 -0
  118. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/object_registry.py +0 -0
  119. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/output/__init__.py +0 -0
  120. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/output/printing.py +0 -0
  121. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/output/status.py +0 -0
  122. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/pandas_utils.py +0 -0
  123. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/pydantic_utils.py +0 -0
  124. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/system.py +0 -0
  125. {langroid-0.45.1 → langroid-0.45.3}/langroid/utils/types.py +0 -0
  126. {langroid-0.45.1 → langroid-0.45.3}/langroid/vector_store/__init__.py +0 -0
  127. {langroid-0.45.1 → langroid-0.45.3}/langroid/vector_store/base.py +0 -0
  128. {langroid-0.45.1 → langroid-0.45.3}/langroid/vector_store/chromadb.py +0 -0
  129. {langroid-0.45.1 → langroid-0.45.3}/langroid/vector_store/lancedb.py +0 -0
  130. {langroid-0.45.1 → langroid-0.45.3}/langroid/vector_store/meilisearch.py +0 -0
  131. {langroid-0.45.1 → langroid-0.45.3}/langroid/vector_store/pineconedb.py +0 -0
  132. {langroid-0.45.1 → langroid-0.45.3}/langroid/vector_store/postgres.py +0 -0
  133. {langroid-0.45.1 → langroid-0.45.3}/langroid/vector_store/qdrantdb.py +0 -0
  134. {langroid-0.45.1 → langroid-0.45.3}/langroid/vector_store/weaviatedb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.45.1
3
+ Version: 0.45.3
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -90,6 +90,7 @@ class AgentConfig(BaseSettings):
90
90
  parsing: Optional[ParsingConfig] = ParsingConfig()
91
91
  prompts: Optional[PromptsConfig] = PromptsConfig()
92
92
  show_stats: bool = True # show token usage/cost stats?
93
+ hide_agent_response: bool = True # hide agent response?
93
94
  add_to_registry: bool = True # register agent in ObjectRegistry?
94
95
  respond_tools_only: bool = False # respond only to tool messages (not plain text)?
95
96
  # allow multiple tool messages in a single response?
@@ -460,6 +461,28 @@ class Agent(ABC):
460
461
  recipient=recipient,
461
462
  )
462
463
 
464
+ def render_agent_response(
465
+ self,
466
+ results: Optional[str | OrderedDict[str, str] | ChatDocument],
467
+ ) -> None:
468
+ """
469
+ Render the response from the agent, typically from tool-handling.
470
+ Args:
471
+ results: results from tool-handling, which may be a string,
472
+ a dict of tool results, or a ChatDocument.
473
+ """
474
+ if self.config.hide_agent_response or results is None:
475
+ return
476
+ if isinstance(results, str):
477
+ results_str = results
478
+ elif isinstance(results, ChatDocument):
479
+ results_str = results.content
480
+ elif isinstance(results, dict):
481
+ results_str = json.dumps(results, indent=2)
482
+ if not settings.quiet:
483
+ console.print(f"[red]{self.indent}", end="")
484
+ print(f"[red]Agent: {escape(results_str)}")
485
+
463
486
  def _agent_response_final(
464
487
  self,
465
488
  msg: Optional[str | ChatDocument],
@@ -477,8 +500,7 @@ class Agent(ABC):
477
500
  elif isinstance(results, dict):
478
501
  results_str = json.dumps(results, indent=2)
479
502
  if not settings.quiet:
480
- console.print(f"[red]{self.indent}", end="")
481
- print(f"[red]Agent: {escape(results_str)}")
503
+ self.render_agent_response(results)
482
504
  maybe_json = len(extract_top_level_json(results_str)) > 0
483
505
  self.callbacks.show_agent_response(
484
506
  content=results_str,
@@ -1341,8 +1363,7 @@ class Agent(ABC):
1341
1363
 
1342
1364
  has_orch = any(isinstance(t, ORCHESTRATION_TOOLS) for t in tools)
1343
1365
  if has_orch and len(tools) > 1:
1344
- err_str = "ERROR: Use ONE tool at a time!"
1345
- return [err_str for _ in tools]
1366
+ return ["ERROR: Use ONE tool at a time!"] * len(tools)
1346
1367
 
1347
1368
  return []
1348
1369
 
@@ -1477,8 +1498,6 @@ class Agent(ABC):
1477
1498
  # as a response to the tool message even though the tool was not intended
1478
1499
  # for this agent.
1479
1500
  return None
1480
- if len(tools) > 1 and not self.config.allow_multiple_tools:
1481
- return self.to_ChatDocument("ERROR: Use ONE tool at a time!")
1482
1501
  if len(tools) == 0:
1483
1502
  fallback_result = self.handle_message_fallback(msg)
1484
1503
  if fallback_result is None:
@@ -1487,10 +1506,14 @@ class Agent(ABC):
1487
1506
  fallback_result,
1488
1507
  chat_doc=msg if isinstance(msg, ChatDocument) else None,
1489
1508
  )
1490
- chat_doc = msg if isinstance(msg, ChatDocument) else None
1491
1509
 
1492
- results = self._get_multiple_orch_tool_errs(tools)
1510
+ results: List[str | ChatDocument | None] = []
1511
+ if len(tools) > 1 and not self.config.allow_multiple_tools:
1512
+ results = ["ERROR: Use ONE tool at a time!"] * len(tools)
1513
+ if not results:
1514
+ results = self._get_multiple_orch_tool_errs(tools)
1493
1515
  if not results:
1516
+ chat_doc = msg if isinstance(msg, ChatDocument) else None
1494
1517
  results = [self.handle_tool_message(t, chat_doc=chat_doc) for t in tools]
1495
1518
  # if there's a solitary ChatDocument|str result, return it as is
1496
1519
  if len(results) == 1 and isinstance(results[0], (str, ChatDocument)):
@@ -85,6 +85,8 @@ class ChatAgentConfig(AgentConfig):
85
85
  enabled when such tool calls are not desired.
86
86
  output_format_include_defaults: Whether to include fields with default arguments
87
87
  in the output schema
88
+ full_citations: Whether to show source reference citation + content for each
89
+ citation, or just the main reference citation.
88
90
  """
89
91
 
90
92
  system_message: str = "You are a helpful assistant."
@@ -101,6 +103,7 @@ class ChatAgentConfig(AgentConfig):
101
103
  instructions_output_format: bool = True
102
104
  output_format_include_defaults: bool = True
103
105
  use_tools_on_output_format: bool = True
106
+ full_citations: bool = True # show source + content for each citation?
104
107
 
105
108
  def _set_fn_or_tools(self, fn_available: bool) -> None:
106
109
  """
@@ -1854,14 +1857,15 @@ class ChatAgent(Agent):
1854
1857
  # we won't have citations yet, so we're done
1855
1858
  return
1856
1859
  if response.metadata.has_citation:
1860
+ citation = (
1861
+ response.metadata.source_content
1862
+ if self.config.full_citations
1863
+ else response.metadata.source
1864
+ )
1857
1865
  if not settings.quiet:
1858
- print(
1859
- "[grey37]SOURCES:\n"
1860
- + escape(response.metadata.source)
1861
- + "[/grey37]"
1862
- )
1866
+ print("[grey37]SOURCES:\n" + escape(citation) + "[/grey37]")
1863
1867
  self.callbacks.show_llm_response(
1864
- content=str(response.metadata.source),
1868
+ content=str(citation),
1865
1869
  is_tool=False,
1866
1870
  cached=False,
1867
1871
  language="text",
@@ -863,12 +863,13 @@ class DocChatAgent(ChatAgent):
863
863
  # extract references like [^2], [^3], etc. from the final answer
864
864
  citations = extract_markdown_references(final_answer)
865
865
  # format the cited references as a string suitable for markdown footnote
866
- citations_str = format_cited_references(citations, passages)
866
+ full_citations_str, citations_str = format_cited_references(citations, passages)
867
867
 
868
868
  return ChatDocument(
869
869
  content=final_answer, # does not contain citations
870
870
  metadata=ChatDocMetaData(
871
- source=citations_str, # only the citations
871
+ source=citations_str, # only the reference headers
872
+ source_content=full_citations_str, # reference + content
872
873
  sender=Entity.LLM,
873
874
  has_citation=len(citations) > 0,
874
875
  cached=getattr(answer_doc.metadata, "cached", False),
@@ -43,7 +43,8 @@ class Entity(str, Enum):
43
43
  class DocMetaData(BaseModel):
44
44
  """Metadata for a document."""
45
45
 
46
- source: str = "context"
46
+ source: str = "context" # just reference
47
+ source_content: str = "context" # reference and content
47
48
  is_chunk: bool = False # if it is a chunk, don't split
48
49
  id: str = Field(default_factory=lambda: str(uuid4()))
49
50
  window_ids: List[str] = [] # for RAG: ids of chunks around this one
@@ -404,8 +404,8 @@ class DocumentParser(Parser):
404
404
  # that it needs to be combined with the next chunk.
405
405
  while len(split) > self.config.chunk_size:
406
406
  # pretty formatting of pages (e.g. 1-3, 4, 5-7)
407
- p_0 = int(pages[0])
408
- p_n = int(pages[-1])
407
+ p_0 = int(pages[0]) - self.config.page_number_offset
408
+ p_n = int(pages[-1]) - self.config.page_number_offset
409
409
  page_str = f"pages {p_0}-{p_n}" if p_0 != p_n else f"page {p_0}"
410
410
  text = self.tokenizer.decode(split[: self.config.chunk_size])
411
411
  docs.append(
@@ -426,13 +426,15 @@ class DocumentParser(Parser):
426
426
  # since it's already included in the prior chunk;
427
427
  # the only exception is if there have been no chunks so far.
428
428
  if len(split) > self.config.overlap or n_chunks == 0:
429
- pg = "-".join([pages[0], pages[-1]])
429
+ p_0 = int(pages[0]) - self.config.page_number_offset
430
+ p_n = int(pages[-1]) - self.config.page_number_offset
431
+ page_str = f"pages {p_0}-{p_n}" if p_0 != p_n else f"page {p_0}"
430
432
  text = self.tokenizer.decode(split[: self.config.chunk_size])
431
433
  docs.append(
432
434
  Document(
433
435
  content=text,
434
436
  metadata=DocMetaData(
435
- source=f"{self.source} pages {pg}",
437
+ source=f"{self.source} {page_str}",
436
438
  is_chunk=True,
437
439
  id=common_id,
438
440
  ),
@@ -1361,6 +1363,10 @@ class GeminiPdfParser(DocumentParser):
1361
1363
 
1362
1364
 
1363
1365
  class MarkerPdfParser(DocumentParser):
1366
+ """
1367
+ Parse PDF files using the `marker` library: https://github.com/VikParuchuri/marker
1368
+ """
1369
+
1364
1370
  DEFAULT_CONFIG = {"paginate_output": True, "output_format": "markdown"}
1365
1371
 
1366
1372
  def __init__(self, source: Union[str, bytes], config: ParsingConfig):
@@ -103,6 +103,9 @@ class ParsingConfig(BaseSettings):
103
103
  chunk_size: int = 200 # aim for this many tokens per chunk
104
104
  overlap: int = 50 # overlap between chunks
105
105
  max_chunks: int = 10_000
106
+ # offset to subtract from page numbers:
107
+ # e.g. if physical page 12 is displayed as page 1, set page_number_offset = 11
108
+ page_number_offset: int = 0
106
109
  # aim to have at least this many chars per chunk when truncating due to punctuation
107
110
  min_chunk_chars: int = 350
108
111
  discard_chunk_chars: int = 5 # discard chunks with fewer than this many chars
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Tuple
2
2
 
3
3
  from langroid.mytypes import Document
4
4
 
@@ -66,7 +66,9 @@ def format_footnote_text(content: str, width: int = 0) -> str:
66
66
  return "\n".join(output_lines)
67
67
 
68
68
 
69
- def format_cited_references(citations: List[int], passages: list[Document]) -> str:
69
+ def format_cited_references(
70
+ citations: List[int], passages: list[Document]
71
+ ) -> Tuple[str, str]:
70
72
  """
71
73
  Given a list of (integer) citations, and a list of passages, return a string
72
74
  that can be added as a footer to the main text, to show sources cited.
@@ -76,16 +78,24 @@ def format_cited_references(citations: List[int], passages: list[Document]) -> s
76
78
  passages (list[Document]): list of passages (Document objects)
77
79
 
78
80
  Returns:
79
- str: formatted string of citations for footnote in markdown
81
+ str: formatted string of FULL citations (i.e. reference AND content)
82
+ for footnote in markdown;
83
+ str: formatted string of BRIEF citations (i.e. reference only)
84
+ for footnote in markdown.
80
85
  """
81
86
  citations_str = ""
87
+ full_citations_str = ""
82
88
  if len(citations) > 0:
83
89
  # append [i] source, content for each citation
84
- citations_str = "\n".join(
90
+ full_citations_str = "\n".join(
85
91
  [
86
92
  f"[^{c}] {passages[c-1].metadata.source}"
87
93
  f"\n{format_footnote_text(passages[c-1].content)}"
88
94
  for c in citations
89
95
  ]
90
96
  )
91
- return citations_str
97
+ # append [i] source for each citation
98
+ citations_str = "\n".join(
99
+ [f"[^{c}] {passages[c-1].metadata.source}" for c in citations]
100
+ )
101
+ return full_citations_str, citations_str
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "langroid"
3
- version = "0.45.1"
3
+ version = "0.45.3"
4
4
  authors = [
5
5
  {name = "Prasad Chalasani", email = "pchalasani@gmail.com"},
6
6
  ]
File without changes
File without changes
File without changes
File without changes