iflow-mcp_mcp-bibliotheque_nationale_de_france 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bnf_api/__init__.py +19 -0
- bnf_api/api.py +148 -0
- bnf_api/config.py +23 -0
- bnf_api/search.py +193 -0
- bnf_api/sequential_reporting.py +683 -0
- iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/METADATA +8 -0
- iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/RECORD +10 -0
- iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/WHEEL +5 -0
- iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/top_level.txt +1 -0
bnf_api/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gallica BnF API Package
|
|
3
|
+
----------------------
|
|
4
|
+
This package provides tools to search and retrieve information from the Gallica digital library
|
|
5
|
+
of the Bibliothèque nationale de France (BnF) using their SRU API.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .api import GallicaAPI
|
|
9
|
+
from .search import SearchAPI
|
|
10
|
+
from .config import DEFAULT_MAX_RECORDS, DEFAULT_START_RECORD, BNF_SRU_URL, DOCUMENT_TYPES
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
'GallicaAPI',
|
|
14
|
+
'SearchAPI',
|
|
15
|
+
'DEFAULT_MAX_RECORDS',
|
|
16
|
+
'DEFAULT_START_RECORD',
|
|
17
|
+
'BNF_SRU_URL',
|
|
18
|
+
'DOCUMENT_TYPES'
|
|
19
|
+
]
|
bnf_api/api.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gallica BnF API Client
|
|
3
|
+
---------------------
|
|
4
|
+
Client for the Gallica BnF SRU API.
|
|
5
|
+
Provides methods to search for documents and retrieve metadata.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import requests
|
|
10
|
+
import xml.etree.ElementTree as ET
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from typing import Dict, Any, List, Optional
|
|
13
|
+
|
|
14
|
+
# Set up logging
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
# Constants
|
|
18
|
+
DEFAULT_MAX_RECORDS = 10
|
|
19
|
+
DEFAULT_START_RECORD = 1
|
|
20
|
+
BNF_SRU_URL = "https://gallica.bnf.fr/SRU"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GallicaAPI:
|
|
24
|
+
"""
|
|
25
|
+
Client for the Gallica BnF SRU API.
|
|
26
|
+
Provides methods to search for documents and retrieve metadata.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
"""Initialize the Gallica API client."""
|
|
31
|
+
self.base_url = BNF_SRU_URL
|
|
32
|
+
logger.info("Gallica API client initialized")
|
|
33
|
+
|
|
34
|
+
def search(self,
|
|
35
|
+
query: str,
|
|
36
|
+
start_record: int = DEFAULT_START_RECORD,
|
|
37
|
+
max_records: int = DEFAULT_MAX_RECORDS) -> Dict[str, Any]:
|
|
38
|
+
"""
|
|
39
|
+
Search for documents in the Gallica digital library.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
query: Search query in CQL format
|
|
43
|
+
start_record: Starting record number for pagination
|
|
44
|
+
max_records: Maximum number of records to return
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Dictionary containing search results and metadata
|
|
48
|
+
"""
|
|
49
|
+
params = {
|
|
50
|
+
'version': '1.2',
|
|
51
|
+
'operation': 'searchRetrieve',
|
|
52
|
+
'query': query,
|
|
53
|
+
'startRecord': start_record,
|
|
54
|
+
'maximumRecords': max_records
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
response = requests.get(self.base_url, params=params)
|
|
59
|
+
response.raise_for_status()
|
|
60
|
+
|
|
61
|
+
# Parse the XML response
|
|
62
|
+
root = ET.fromstring(response.text)
|
|
63
|
+
|
|
64
|
+
# Define namespaces used in the XML
|
|
65
|
+
namespaces = {
|
|
66
|
+
'srw': 'http://www.loc.gov/zing/srw/',
|
|
67
|
+
'dc': 'http://purl.org/dc/elements/1.1/',
|
|
68
|
+
'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/'
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
# Get the number of records found
|
|
72
|
+
num_records = root.find('.//srw:numberOfRecords', namespaces).text
|
|
73
|
+
|
|
74
|
+
# Create a dictionary to store the results
|
|
75
|
+
results = {
|
|
76
|
+
"metadata": {
|
|
77
|
+
"query": query,
|
|
78
|
+
"total_records": num_records,
|
|
79
|
+
"records_returned": len(root.findall('.//srw:record', namespaces)),
|
|
80
|
+
"date_retrieved": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
81
|
+
},
|
|
82
|
+
"records": []
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
# Process each record
|
|
86
|
+
for record in root.findall('.//srw:record', namespaces):
|
|
87
|
+
# Get the record data element that contains Dublin Core metadata
|
|
88
|
+
record_data = record.find('.//srw:recordData/oai_dc:dc', namespaces)
|
|
89
|
+
|
|
90
|
+
if record_data is not None:
|
|
91
|
+
# Create a dictionary for this record
|
|
92
|
+
record_dict = {}
|
|
93
|
+
|
|
94
|
+
# Define the Dublin Core fields we want to extract
|
|
95
|
+
dc_fields = [
|
|
96
|
+
'title', 'creator', 'contributor', 'publisher', 'date',
|
|
97
|
+
'description', 'type', 'format', 'identifier', 'source',
|
|
98
|
+
'language', 'relation', 'coverage', 'rights', 'subject'
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
# Extract each field
|
|
102
|
+
for field in dc_fields:
|
|
103
|
+
elements = record_data.findall(f'./dc:{field}', namespaces)
|
|
104
|
+
if elements:
|
|
105
|
+
# If there are multiple values, store them as a list
|
|
106
|
+
if len(elements) > 1:
|
|
107
|
+
record_dict[field] = [elem.text.strip() for elem in elements if elem.text and elem.text.strip()]
|
|
108
|
+
# If there's only one value, store it as a string
|
|
109
|
+
else:
|
|
110
|
+
text = elements[0].text
|
|
111
|
+
if text and text.strip():
|
|
112
|
+
record_dict[field] = text.strip()
|
|
113
|
+
|
|
114
|
+
# Extract Gallica URL from identifiers
|
|
115
|
+
if 'identifier' in record_dict:
|
|
116
|
+
identifiers = record_dict['identifier']
|
|
117
|
+
if isinstance(identifiers, list):
|
|
118
|
+
for identifier in identifiers:
|
|
119
|
+
if 'gallica.bnf.fr/ark:' in identifier:
|
|
120
|
+
record_dict['gallica_url'] = identifier
|
|
121
|
+
break
|
|
122
|
+
elif 'gallica.bnf.fr/ark:' in identifiers:
|
|
123
|
+
record_dict['gallica_url'] = identifiers
|
|
124
|
+
|
|
125
|
+
# Add the record to our results
|
|
126
|
+
results['records'].append(record_dict)
|
|
127
|
+
|
|
128
|
+
return results
|
|
129
|
+
|
|
130
|
+
except requests.exceptions.RequestException as e:
|
|
131
|
+
logger.error(f"Error during Gallica API request: {e}")
|
|
132
|
+
return {
|
|
133
|
+
"error": str(e),
|
|
134
|
+
"query": query,
|
|
135
|
+
"parameters": params
|
|
136
|
+
}
|
|
137
|
+
except ET.ParseError as e:
|
|
138
|
+
logger.error(f"Error parsing XML response: {e}")
|
|
139
|
+
return {
|
|
140
|
+
"error": f"XML parsing error: {str(e)}",
|
|
141
|
+
"query": query
|
|
142
|
+
}
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.error(f"Unexpected error: {e}")
|
|
145
|
+
return {
|
|
146
|
+
"error": str(e),
|
|
147
|
+
"query": query
|
|
148
|
+
}
|
bnf_api/config.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration constants for the Gallica BnF API.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# Default API parameters
|
|
6
|
+
DEFAULT_MAX_RECORDS = 10
|
|
7
|
+
DEFAULT_START_RECORD = 1
|
|
8
|
+
|
|
9
|
+
# API URL
|
|
10
|
+
BNF_SRU_URL = "https://gallica.bnf.fr/SRU"
|
|
11
|
+
|
|
12
|
+
# Common document types in Gallica
|
|
13
|
+
DOCUMENT_TYPES = {
|
|
14
|
+
"monographie": "Books/Monographs",
|
|
15
|
+
"periodique": "Periodicals/Newspapers",
|
|
16
|
+
"image": "Images",
|
|
17
|
+
"manuscrit": "Manuscripts",
|
|
18
|
+
"carte": "Maps",
|
|
19
|
+
"musique": "Music scores",
|
|
20
|
+
"objet": "Objects",
|
|
21
|
+
"video": "Videos",
|
|
22
|
+
"son": "Audio recordings"
|
|
23
|
+
}
|
bnf_api/search.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Search utilities for the Gallica BnF API.
|
|
3
|
+
Provides functions to build different types of search queries.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Dict, Any, List, Optional
|
|
7
|
+
from .api import GallicaAPI
|
|
8
|
+
from .config import DEFAULT_MAX_RECORDS, DEFAULT_START_RECORD
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SearchAPI:
|
|
12
|
+
"""
|
|
13
|
+
Search utilities for the Gallica BnF API.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, gallica_api: GallicaAPI):
|
|
17
|
+
"""
|
|
18
|
+
Initialize the Search API.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
gallica_api: An initialized GallicaAPI instance
|
|
22
|
+
"""
|
|
23
|
+
self.gallica_api = gallica_api
|
|
24
|
+
|
|
25
|
+
def search_by_title(
|
|
26
|
+
self,
|
|
27
|
+
title: str,
|
|
28
|
+
exact_match: bool = False,
|
|
29
|
+
max_results: int = DEFAULT_MAX_RECORDS,
|
|
30
|
+
start_record: int = DEFAULT_START_RECORD
|
|
31
|
+
) -> Dict[str, Any]:
|
|
32
|
+
"""
|
|
33
|
+
Search for documents in the Gallica digital library by title.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
title: The title to search for
|
|
37
|
+
exact_match: If True, search for the exact title; otherwise, search for title containing the words
|
|
38
|
+
max_results: Maximum number of results to return (1-50)
|
|
39
|
+
start_record: Starting record for pagination
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Dictionary containing search results and metadata
|
|
43
|
+
"""
|
|
44
|
+
if exact_match:
|
|
45
|
+
query = f'dc.title all "{title}"'
|
|
46
|
+
else:
|
|
47
|
+
query = f'dc.title all {title}'
|
|
48
|
+
|
|
49
|
+
return self.gallica_api.search(query, start_record, max_results)
|
|
50
|
+
|
|
51
|
+
def search_by_author(
|
|
52
|
+
self,
|
|
53
|
+
author: str,
|
|
54
|
+
exact_match: bool = False,
|
|
55
|
+
max_results: int = DEFAULT_MAX_RECORDS,
|
|
56
|
+
start_record: int = DEFAULT_START_RECORD
|
|
57
|
+
) -> Dict[str, Any]:
|
|
58
|
+
"""
|
|
59
|
+
Search for documents in the Gallica digital library by author.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
author: The author name to search for
|
|
63
|
+
exact_match: If True, search for the exact author name; otherwise, search for author containing the words
|
|
64
|
+
max_results: Maximum number of results to return (1-50)
|
|
65
|
+
start_record: Starting record for pagination
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Dictionary containing search results and metadata
|
|
69
|
+
"""
|
|
70
|
+
if exact_match:
|
|
71
|
+
query = f'dc.creator all "{author}"'
|
|
72
|
+
else:
|
|
73
|
+
query = f'dc.creator all {author}'
|
|
74
|
+
|
|
75
|
+
return self.gallica_api.search(query, start_record, max_results)
|
|
76
|
+
|
|
77
|
+
def search_by_subject(
|
|
78
|
+
self,
|
|
79
|
+
subject: str,
|
|
80
|
+
exact_match: bool = False,
|
|
81
|
+
max_results: int = DEFAULT_MAX_RECORDS,
|
|
82
|
+
start_record: int = DEFAULT_START_RECORD
|
|
83
|
+
) -> Dict[str, Any]:
|
|
84
|
+
"""
|
|
85
|
+
Search for documents in the Gallica digital library by subject.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
subject: The subject to search for
|
|
89
|
+
exact_match: If True, search for the exact subject; otherwise, search for subject containing the words
|
|
90
|
+
max_results: Maximum number of results to return (1-50)
|
|
91
|
+
start_record: Starting record for pagination
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Dictionary containing search results and metadata
|
|
95
|
+
"""
|
|
96
|
+
if exact_match:
|
|
97
|
+
query = f'dc.subject all "{subject}"'
|
|
98
|
+
else:
|
|
99
|
+
query = f'dc.subject all {subject}'
|
|
100
|
+
|
|
101
|
+
return self.gallica_api.search(query, start_record, max_results)
|
|
102
|
+
|
|
103
|
+
def search_by_date(
|
|
104
|
+
self,
|
|
105
|
+
date: str,
|
|
106
|
+
max_results: int = DEFAULT_MAX_RECORDS,
|
|
107
|
+
start_record: int = DEFAULT_START_RECORD
|
|
108
|
+
) -> Dict[str, Any]:
|
|
109
|
+
"""
|
|
110
|
+
Search for documents in the Gallica digital library by date.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
date: The date to search for (format: YYYY or YYYY-MM or YYYY-MM-DD)
|
|
114
|
+
max_results: Maximum number of results to return (1-50)
|
|
115
|
+
start_record: Starting record for pagination
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Dictionary containing search results and metadata
|
|
119
|
+
"""
|
|
120
|
+
query = f'dc.date all "{date}"'
|
|
121
|
+
|
|
122
|
+
return self.gallica_api.search(query, start_record, max_results)
|
|
123
|
+
|
|
124
|
+
def search_by_document_type(
|
|
125
|
+
self,
|
|
126
|
+
doc_type: str,
|
|
127
|
+
max_results: int = DEFAULT_MAX_RECORDS,
|
|
128
|
+
start_record: int = DEFAULT_START_RECORD
|
|
129
|
+
) -> Dict[str, Any]:
|
|
130
|
+
"""
|
|
131
|
+
Search for documents in the Gallica digital library by document type.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
doc_type: The document type to search for (e.g., monographie, periodique, image, manuscrit, carte, musique, etc.)
|
|
135
|
+
max_results: Maximum number of results to return (1-50)
|
|
136
|
+
start_record: Starting record for pagination
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Dictionary containing search results and metadata
|
|
140
|
+
"""
|
|
141
|
+
query = f'dc.type all "{doc_type}"'
|
|
142
|
+
|
|
143
|
+
return self.gallica_api.search(query, start_record, max_results)
|
|
144
|
+
|
|
145
|
+
def advanced_search(
|
|
146
|
+
self,
|
|
147
|
+
query: str,
|
|
148
|
+
max_results: int = DEFAULT_MAX_RECORDS,
|
|
149
|
+
start_record: int = DEFAULT_START_RECORD
|
|
150
|
+
) -> Dict[str, Any]:
|
|
151
|
+
"""
|
|
152
|
+
Perform an advanced search using custom CQL query syntax.
|
|
153
|
+
|
|
154
|
+
This method allows for complex queries using the CQL (Contextual Query Language) syntax.
|
|
155
|
+
Examples:
|
|
156
|
+
- Search for books by Victor Hugo: dc.creator all "Victor Hugo" and dc.type all "monographie"
|
|
157
|
+
- Search for maps about Paris: dc.subject all "Paris" and dc.type all "carte"
|
|
158
|
+
- Search for documents in English: dc.language all "eng"
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
query: Custom CQL query string
|
|
162
|
+
max_results: Maximum number of results to return (1-50)
|
|
163
|
+
start_record: Starting record for pagination
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Dictionary containing search results and metadata
|
|
167
|
+
"""
|
|
168
|
+
return self.gallica_api.search(query, start_record, max_results)
|
|
169
|
+
|
|
170
|
+
def natural_language_search(
|
|
171
|
+
self,
|
|
172
|
+
query: str,
|
|
173
|
+
max_results: int = DEFAULT_MAX_RECORDS,
|
|
174
|
+
start_record: int = DEFAULT_START_RECORD
|
|
175
|
+
) -> Dict[str, Any]:
|
|
176
|
+
"""
|
|
177
|
+
Search the Gallica digital library using natural language.
|
|
178
|
+
|
|
179
|
+
This is a simplified search that uses the 'gallica all' operator to search across all fields.
|
|
180
|
+
It's the most user-friendly way to search but may not be as precise as the other search methods.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
query: Natural language search query
|
|
184
|
+
max_results: Maximum number of results to return (1-50)
|
|
185
|
+
start_record: Starting record for pagination
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Dictionary containing search results and metadata
|
|
189
|
+
"""
|
|
190
|
+
# Format the query for the Gallica API
|
|
191
|
+
formatted_query = f'gallica all "{query}"'
|
|
192
|
+
|
|
193
|
+
return self.gallica_api.search(formatted_query, start_record, max_results)
|
|
@@ -0,0 +1,683 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BnF Sequential Reporting Tool
|
|
3
|
+
----------------------------
|
|
4
|
+
This module provides a tool for generating structured reports based on research
|
|
5
|
+
from the Gallica BnF digital library. It uses a sequential approach to gather sources,
|
|
6
|
+
analyze them, and generate a comprehensive report with proper citations.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import sys
|
|
11
|
+
import logging
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Dict, List, Optional, Any, Union
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
import textwrap
|
|
16
|
+
|
|
17
|
+
from .api import GallicaAPI
|
|
18
|
+
from .search import SearchAPI
|
|
19
|
+
|
|
20
|
+
# Set up logging
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
# Constants
|
|
24
|
+
DEFAULT_PAGE_COUNT = 4
|
|
25
|
+
DEFAULT_SOURCE_COUNT = 10
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class ReportSection:
|
|
30
|
+
"""
|
|
31
|
+
Represents a section of the sequential report.
|
|
32
|
+
"""
|
|
33
|
+
section_number: int
|
|
34
|
+
total_sections: int
|
|
35
|
+
content: str
|
|
36
|
+
title: str
|
|
37
|
+
is_bibliography: bool = False
|
|
38
|
+
sources_used: List[int] = None
|
|
39
|
+
next_section_needed: bool = True
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class SequentialReportingServer:
|
|
43
|
+
"""
|
|
44
|
+
Server for generating sequential reports based on BnF research.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, gallica_api: GallicaAPI, search_api: SearchAPI):
|
|
48
|
+
"""
|
|
49
|
+
Initialize the Sequential Reporting Server.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
gallica_api: An initialized GallicaAPI instance
|
|
53
|
+
search_api: An initialized SearchAPI instance
|
|
54
|
+
"""
|
|
55
|
+
self.gallica_api = gallica_api
|
|
56
|
+
self.search_api = search_api
|
|
57
|
+
self.topic = None
|
|
58
|
+
self.page_count = DEFAULT_PAGE_COUNT
|
|
59
|
+
self.source_count = DEFAULT_SOURCE_COUNT
|
|
60
|
+
self.sources = []
|
|
61
|
+
self.report_sections = []
|
|
62
|
+
self.plan = None
|
|
63
|
+
self._current_step = 0
|
|
64
|
+
self.include_graphics = False
|
|
65
|
+
self.graphics = []
|
|
66
|
+
|
|
67
|
+
def validate_section_data(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
68
|
+
"""
|
|
69
|
+
Validate the input data for a section.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
input_data: The input data for the section
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Validated input data
|
|
76
|
+
"""
|
|
77
|
+
validated_data = {}
|
|
78
|
+
|
|
79
|
+
# Handle initialization with topic
|
|
80
|
+
if 'topic' in input_data:
|
|
81
|
+
validated_data['topic'] = str(input_data['topic'])
|
|
82
|
+
if 'page_count' in input_data:
|
|
83
|
+
try:
|
|
84
|
+
validated_data['page_count'] = int(input_data['page_count'])
|
|
85
|
+
except (ValueError, TypeError):
|
|
86
|
+
validated_data['page_count'] = DEFAULT_PAGE_COUNT
|
|
87
|
+
if 'source_count' in input_data:
|
|
88
|
+
try:
|
|
89
|
+
validated_data['source_count'] = int(input_data['source_count'])
|
|
90
|
+
except (ValueError, TypeError):
|
|
91
|
+
validated_data['source_count'] = DEFAULT_SOURCE_COUNT
|
|
92
|
+
if 'include_graphics' in input_data:
|
|
93
|
+
validated_data['include_graphics'] = bool(input_data['include_graphics'])
|
|
94
|
+
return validated_data
|
|
95
|
+
|
|
96
|
+
# Handle search_sources flag
|
|
97
|
+
if 'search_sources' in input_data and input_data['search_sources']:
|
|
98
|
+
validated_data['search_sources'] = True
|
|
99
|
+
return validated_data
|
|
100
|
+
|
|
101
|
+
# Check if required fields are present for section data
|
|
102
|
+
required_fields = ['section_number', 'total_sections']
|
|
103
|
+
for field in required_fields:
|
|
104
|
+
if field not in input_data:
|
|
105
|
+
raise ValueError(f"Missing required field: {field}")
|
|
106
|
+
|
|
107
|
+
# Convert section_number and total_sections to integers if they're strings
|
|
108
|
+
section_number = input_data['section_number']
|
|
109
|
+
if isinstance(section_number, str) and section_number.isdigit():
|
|
110
|
+
section_number = int(section_number)
|
|
111
|
+
elif not isinstance(section_number, int):
|
|
112
|
+
raise ValueError(f"Invalid sectionNumber: must be a number")
|
|
113
|
+
|
|
114
|
+
total_sections = input_data['total_sections']
|
|
115
|
+
if isinstance(total_sections, str) and total_sections.isdigit():
|
|
116
|
+
total_sections = int(total_sections)
|
|
117
|
+
elif not isinstance(total_sections, int):
|
|
118
|
+
raise ValueError(f"Invalid totalSections: must be a number")
|
|
119
|
+
|
|
120
|
+
# Get title
|
|
121
|
+
title = input_data.get('title', f"Section {section_number}")
|
|
122
|
+
|
|
123
|
+
# Get content (empty string if not provided)
|
|
124
|
+
content = input_data.get('content', '')
|
|
125
|
+
if content is None:
|
|
126
|
+
content = ''
|
|
127
|
+
if not isinstance(content, str):
|
|
128
|
+
raise ValueError(f"Invalid content: must be a string")
|
|
129
|
+
|
|
130
|
+
# Get is_bibliography flag
|
|
131
|
+
is_bibliography = input_data.get('is_bibliography', False)
|
|
132
|
+
|
|
133
|
+
# Get sources_used (empty list if not provided)
|
|
134
|
+
sources_used = input_data.get('sources_used', [])
|
|
135
|
+
if sources_used is None:
|
|
136
|
+
sources_used = []
|
|
137
|
+
|
|
138
|
+
# Get next_section_needed flag
|
|
139
|
+
next_section_needed = input_data.get('next_section_needed', True)
|
|
140
|
+
|
|
141
|
+
# Create and return ReportSection
|
|
142
|
+
return {
|
|
143
|
+
'section_number': section_number,
|
|
144
|
+
'total_sections': total_sections,
|
|
145
|
+
'title': title,
|
|
146
|
+
'content': content,
|
|
147
|
+
'is_bibliography': is_bibliography,
|
|
148
|
+
'sources_used': sources_used,
|
|
149
|
+
'next_section_needed': next_section_needed
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
def search_sources(self, topic: str, source_count: int = DEFAULT_SOURCE_COUNT) -> List[Dict[str, Any]]:
|
|
153
|
+
"""
|
|
154
|
+
Search for sources on the given topic.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
topic: The topic to search for
|
|
158
|
+
source_count: The number of sources to retrieve
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
List of sources as dictionaries
|
|
162
|
+
"""
|
|
163
|
+
try:
|
|
164
|
+
# Try natural language search first
|
|
165
|
+
results = self.search_api.natural_language_search(topic, max_results=source_count)
|
|
166
|
+
|
|
167
|
+
# If not enough results, try subject search
|
|
168
|
+
if len(results.get('records', [])) < source_count:
|
|
169
|
+
subject_results = self.search_api.search_by_subject(topic, max_results=source_count - len(results.get('records', [])))
|
|
170
|
+
# Combine results
|
|
171
|
+
all_records = results.get('records', []) + subject_results.get('records', [])
|
|
172
|
+
results['records'] = all_records
|
|
173
|
+
|
|
174
|
+
# Format the results
|
|
175
|
+
sources = []
|
|
176
|
+
for i, result in enumerate(results.get('records', [])[:source_count], 1):
|
|
177
|
+
source = {
|
|
178
|
+
'id': i,
|
|
179
|
+
'title': result.get('title', 'Unknown Title'),
|
|
180
|
+
'creator': result.get('creator', 'Unknown Author'),
|
|
181
|
+
'date': result.get('date', 'Unknown Date'),
|
|
182
|
+
'type': result.get('type', 'Unknown Type'),
|
|
183
|
+
'language': result.get('language', 'Unknown Language'),
|
|
184
|
+
'url': result.get('url', ''),
|
|
185
|
+
'citation': self._format_citation(result),
|
|
186
|
+
'thumbnail': result.get('thumbnail', '')
|
|
187
|
+
}
|
|
188
|
+
sources.append(source)
|
|
189
|
+
|
|
190
|
+
return sources
|
|
191
|
+
except Exception as e:
|
|
192
|
+
print(f"Error searching for sources: {e}")
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
def search_graphics(self, topic: str, count: int = 5) -> List[Dict[str, Any]]:
|
|
196
|
+
"""
|
|
197
|
+
Search for graphics (images, maps) related to the topic.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
topic: The topic to search for
|
|
201
|
+
count: The number of graphics to retrieve
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
List of graphics as dictionaries
|
|
205
|
+
"""
|
|
206
|
+
try:
|
|
207
|
+
# Break down the topic into keywords for better search results
|
|
208
|
+
keywords = topic.split()
|
|
209
|
+
main_keyword = keywords[0] if keywords else topic
|
|
210
|
+
|
|
211
|
+
# Search for images with broader terms
|
|
212
|
+
image_query = f'gallica all "{main_keyword}" and dc.type all "image"'
|
|
213
|
+
image_results = self.search_api.advanced_search(image_query, max_results=count)
|
|
214
|
+
|
|
215
|
+
# If no results, try with the full topic
|
|
216
|
+
if not image_results.get('records', []):
|
|
217
|
+
image_query = f'gallica all "{topic}" and dc.type all "image"'
|
|
218
|
+
image_results = self.search_api.advanced_search(image_query, max_results=count)
|
|
219
|
+
|
|
220
|
+
# Search for maps with broader terms
|
|
221
|
+
map_query = f'gallica all "{main_keyword}" and dc.type all "carte"'
|
|
222
|
+
map_results = self.search_api.advanced_search(map_query, max_results=count)
|
|
223
|
+
|
|
224
|
+
# If no results, try with the full topic
|
|
225
|
+
if not map_results.get('records', []):
|
|
226
|
+
map_query = f'gallica all "{topic}" and dc.type all "carte"'
|
|
227
|
+
map_results = self.search_api.advanced_search(map_query, max_results=count)
|
|
228
|
+
|
|
229
|
+
# If still no results, try a more general search for any visual material
|
|
230
|
+
if not image_results.get('records', []) and not map_results.get('records', []):
|
|
231
|
+
general_query = f'gallica all "{main_keyword}" and (dc.type all "image" or dc.type all "carte" or dc.type all "estampe")'
|
|
232
|
+
general_results = self.search_api.advanced_search(general_query, max_results=count)
|
|
233
|
+
image_results = general_results
|
|
234
|
+
|
|
235
|
+
# Combine and format results
|
|
236
|
+
graphics = []
|
|
237
|
+
|
|
238
|
+
# Process image results
|
|
239
|
+
for i, result in enumerate(image_results.get('records', []), 1):
|
|
240
|
+
# Extract URL from gallica_url if available (without /thumbnail suffix)
|
|
241
|
+
url = result.get('gallica_url', '')
|
|
242
|
+
thumbnail = ''
|
|
243
|
+
if url:
|
|
244
|
+
# Remove /thumbnail suffix if it exists
|
|
245
|
+
url = url.replace('/thumbnail', '')
|
|
246
|
+
ark_id = url.split('ark:')[1] if 'ark:' in url else ''
|
|
247
|
+
if ark_id:
|
|
248
|
+
thumbnail = f"https://gallica.bnf.fr/ark:{ark_id}/thumbnail"
|
|
249
|
+
|
|
250
|
+
graphic = {
|
|
251
|
+
'id': i,
|
|
252
|
+
'title': result.get('title', 'Untitled Image'),
|
|
253
|
+
'description': f"Image related to {topic}: {result.get('title', 'Untitled Image')}",
|
|
254
|
+
'type': 'image',
|
|
255
|
+
'url': url,
|
|
256
|
+
'thumbnail': thumbnail
|
|
257
|
+
}
|
|
258
|
+
graphics.append(graphic)
|
|
259
|
+
|
|
260
|
+
# Process map results
|
|
261
|
+
for i, result in enumerate(map_results.get('records', []), len(graphics) + 1):
|
|
262
|
+
# Extract URL from gallica_url if available (without /thumbnail suffix)
|
|
263
|
+
url = result.get('gallica_url', '')
|
|
264
|
+
thumbnail = ''
|
|
265
|
+
if url:
|
|
266
|
+
# Remove /thumbnail suffix if it exists
|
|
267
|
+
url = url.replace('/thumbnail', '')
|
|
268
|
+
ark_id = url.split('ark:')[1] if 'ark:' in url else ''
|
|
269
|
+
if ark_id:
|
|
270
|
+
thumbnail = f"https://gallica.bnf.fr/ark:{ark_id}/thumbnail"
|
|
271
|
+
|
|
272
|
+
graphic = {
|
|
273
|
+
'id': i,
|
|
274
|
+
'title': result.get('title', 'Untitled Map'),
|
|
275
|
+
'description': f"Map related to {topic}: {result.get('title', 'Untitled Map')}",
|
|
276
|
+
'type': 'map',
|
|
277
|
+
'url': url,
|
|
278
|
+
'thumbnail': thumbnail
|
|
279
|
+
}
|
|
280
|
+
graphics.append(graphic)
|
|
281
|
+
|
|
282
|
+
# If we still have no graphics, create some placeholder graphics with generic URLs
|
|
283
|
+
if not graphics:
|
|
284
|
+
# Create some placeholder graphics
|
|
285
|
+
graphics = [
|
|
286
|
+
{
|
|
287
|
+
'id': 1,
|
|
288
|
+
'title': f"Illustration related to {topic}",
|
|
289
|
+
'description': f"Illustration related to {topic}",
|
|
290
|
+
'type': 'image',
|
|
291
|
+
'url': 'https://gallica.bnf.fr/',
|
|
292
|
+
'thumbnail': 'https://gallica.bnf.fr/themes/gallica2015/images/logo-gallica.png'
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
'id': 2,
|
|
296
|
+
'title': f"Map related to {topic}",
|
|
297
|
+
'description': f"Map related to {topic}",
|
|
298
|
+
'type': 'map',
|
|
299
|
+
'url': 'https://gallica.bnf.fr/',
|
|
300
|
+
'thumbnail': 'https://gallica.bnf.fr/themes/gallica2015/images/logo-gallica.png'
|
|
301
|
+
}
|
|
302
|
+
]
|
|
303
|
+
|
|
304
|
+
return graphics[:count]
|
|
305
|
+
except Exception as e:
|
|
306
|
+
print(f"Error searching for graphics: {e}")
|
|
307
|
+
return []
|
|
308
|
+
|
|
309
|
+
def process_section(self, input_data: Any) -> Dict[str, Any]:
|
|
310
|
+
"""
|
|
311
|
+
Process a report section following a sequential approach.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
input_data: The input data for the section
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
Response data as a dictionary
|
|
318
|
+
"""
|
|
319
|
+
try:
|
|
320
|
+
# Validate the input data
|
|
321
|
+
data = self.validate_section_data(input_data)
|
|
322
|
+
|
|
323
|
+
# Initialize with topic
|
|
324
|
+
if 'topic' in data:
|
|
325
|
+
self.topic = data['topic']
|
|
326
|
+
self.page_count = data.get('page_count', DEFAULT_PAGE_COUNT)
|
|
327
|
+
self.source_count = data.get('source_count', DEFAULT_SOURCE_COUNT)
|
|
328
|
+
self.include_graphics = data.get('include_graphics', False)
|
|
329
|
+
self.sources = []
|
|
330
|
+
self.graphics = []
|
|
331
|
+
self.report_sections = []
|
|
332
|
+
self._current_step = 0
|
|
333
|
+
|
|
334
|
+
# Create a plan for the report
|
|
335
|
+
self.plan = self.create_plan(self.topic, self.page_count)
|
|
336
|
+
|
|
337
|
+
return {
|
|
338
|
+
'content': [{
|
|
339
|
+
'text': json.dumps({
|
|
340
|
+
'topic': self.topic,
|
|
341
|
+
'pageCount': self.page_count,
|
|
342
|
+
'sourceCount': self.source_count,
|
|
343
|
+
'includeGraphics': self.include_graphics,
|
|
344
|
+
'plan': self.plan,
|
|
345
|
+
'nextStep': 'Search for sources using natural_language_search or search_by_subject'
|
|
346
|
+
})
|
|
347
|
+
}]
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
# Search for sources
|
|
351
|
+
if data.get('search_sources', False):
|
|
352
|
+
if not self.topic:
|
|
353
|
+
return {'content': [{'text': 'Error: No topic specified. Please initialize with a topic first.'}]}
|
|
354
|
+
|
|
355
|
+
self.sources = self.search_sources(self.topic, self.source_count)
|
|
356
|
+
|
|
357
|
+
# If graphics are requested, search for them
|
|
358
|
+
if self.include_graphics:
|
|
359
|
+
self.graphics = self.search_graphics(self.topic, count=5)
|
|
360
|
+
|
|
361
|
+
self._current_step = 1
|
|
362
|
+
|
|
363
|
+
return {
|
|
364
|
+
'content': [{
|
|
365
|
+
'text': json.dumps({
|
|
366
|
+
'sources': self.sources,
|
|
367
|
+
'graphics': self.graphics if self.include_graphics else [],
|
|
368
|
+
'nextStep': 'Create bibliography section'
|
|
369
|
+
})
|
|
370
|
+
}]
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
# Process section data for bibliography or content sections
|
|
374
|
+
validated_input = self.validate_section_data(input_data)
|
|
375
|
+
|
|
376
|
+
# Adjust total sections if needed
|
|
377
|
+
if validated_input['section_number'] > validated_input['total_sections']:
|
|
378
|
+
validated_input['total_sections'] = validated_input['section_number']
|
|
379
|
+
|
|
380
|
+
# Add section to report
|
|
381
|
+
self.report_sections.append(validated_input)
|
|
382
|
+
|
|
383
|
+
# Format and display section
|
|
384
|
+
formatted_section = self.format_section(validated_input)
|
|
385
|
+
print(formatted_section, file=sys.stderr)
|
|
386
|
+
|
|
387
|
+
# Update current step in plan
|
|
388
|
+
if self.plan:
|
|
389
|
+
self.plan["current_section"] = validated_input['section_number']
|
|
390
|
+
if validated_input['section_number'] < len(self.plan["sections"]):
|
|
391
|
+
next_section_title = self.plan["sections"][validated_input['section_number']]["title"]
|
|
392
|
+
next_step = f"Create section {validated_input['section_number'] + 1}: {next_section_title}"
|
|
393
|
+
else:
|
|
394
|
+
next_step = "Report complete"
|
|
395
|
+
else:
|
|
396
|
+
next_step = "Continue writing the report"
|
|
397
|
+
if not validated_input['next_section_needed']:
|
|
398
|
+
next_step = "Report complete"
|
|
399
|
+
|
|
400
|
+
# Calculate progress
|
|
401
|
+
progress = (len(self.report_sections) / validated_input['total_sections']) * 100
|
|
402
|
+
|
|
403
|
+
return {
|
|
404
|
+
'content': [{
|
|
405
|
+
'text': json.dumps({
|
|
406
|
+
'sectionNumber': validated_input['section_number'],
|
|
407
|
+
'totalSections': validated_input['total_sections'],
|
|
408
|
+
'nextSectionNeeded': validated_input['next_section_needed'],
|
|
409
|
+
'progress': f"{progress:.1f}%",
|
|
410
|
+
'reportSectionsCount': len(self.report_sections),
|
|
411
|
+
'nextStep': next_step,
|
|
412
|
+
'sources': self.sources if validated_input['is_bibliography'] else None
|
|
413
|
+
})
|
|
414
|
+
}]
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
except Exception as error:
|
|
418
|
+
logger.error(f"Error processing report section: {error}")
|
|
419
|
+
return {
|
|
420
|
+
'content': [{
|
|
421
|
+
'text': json.dumps({
|
|
422
|
+
'error': str(error),
|
|
423
|
+
'status': 'failed'
|
|
424
|
+
})
|
|
425
|
+
}],
|
|
426
|
+
'isError': True
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
def format_section(self, section: Dict[str, Any]) -> str:
|
|
430
|
+
"""
|
|
431
|
+
Format a report section for display.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
section: The report section to format
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
Formatted section as a string
|
|
438
|
+
"""
|
|
439
|
+
# Get section information
|
|
440
|
+
section_number = section.get('section_number', 0)
|
|
441
|
+
total_sections = section.get('total_sections', 0)
|
|
442
|
+
title = section.get('title', 'Untitled')
|
|
443
|
+
is_bibliography = section.get('is_bibliography', False)
|
|
444
|
+
|
|
445
|
+
# Create a box for the section
|
|
446
|
+
width = 80
|
|
447
|
+
icon = "\033[93m📚\033[0m" if is_bibliography else "\033[94m📄\033[0m" # Yellow for bibliography, blue for content
|
|
448
|
+
|
|
449
|
+
header = f" {icon} Section{section_number}/{total_sections}: {title} "
|
|
450
|
+
|
|
451
|
+
box = "┌" + "─" * (width - 2) + "┐\n"
|
|
452
|
+
box += "│" + header + " " * (width - len(header) - 2) + "│\n"
|
|
453
|
+
box += "├" + "─" * (width - 2) + "┤\n"
|
|
454
|
+
|
|
455
|
+
# Add content
|
|
456
|
+
content = section.get('content', '')
|
|
457
|
+
if content:
|
|
458
|
+
# Wrap content to fit in the box
|
|
459
|
+
wrapped_content = textwrap.wrap(content, width=width-4)
|
|
460
|
+
for line in wrapped_content:
|
|
461
|
+
box += "│ " + line + " " * (width - len(line) - 4) + " │\n"
|
|
462
|
+
|
|
463
|
+
# Add graphics if available and this is not a bibliography
|
|
464
|
+
if not is_bibliography and self.include_graphics and self.graphics:
|
|
465
|
+
# Find graphics relevant to this section
|
|
466
|
+
section_graphics = []
|
|
467
|
+
for graphic in self.graphics:
|
|
468
|
+
# Simple relevance check - could be improved
|
|
469
|
+
if any(term in graphic['title'].lower() for term in title.lower().split()):
|
|
470
|
+
section_graphics.append(graphic)
|
|
471
|
+
|
|
472
|
+
# Add up to 2 graphics for this section
|
|
473
|
+
if section_graphics:
|
|
474
|
+
box += "│ " + " " * (width - 4) + " │\n"
|
|
475
|
+
box += "│ " + "Graphics:" + " " * (width - 13) + " │\n"
|
|
476
|
+
for graphic in section_graphics[:2]:
|
|
477
|
+
desc = f"- {graphic['description']}"
|
|
478
|
+
wrapped_desc = textwrap.wrap(desc, width=width-4)
|
|
479
|
+
for line in wrapped_desc:
|
|
480
|
+
box += "│ " + line + " " * (width - len(line) - 4) + " │\n"
|
|
481
|
+
box += "│ " + f" URL: {graphic['url']}" + " " * (width - len(f" URL: {graphic['url']}") - 4) + " │\n"
|
|
482
|
+
|
|
483
|
+
box += "└" + "─" * (width - 2) + "┘"
|
|
484
|
+
|
|
485
|
+
return box
|
|
486
|
+
|
|
487
|
+
def create_plan(self, topic: str, page_count: int = DEFAULT_PAGE_COUNT) -> Dict[str, Any]:
|
|
488
|
+
"""
|
|
489
|
+
Create a sequential plan for the report based on the topic.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
topic: The research topic
|
|
493
|
+
page_count: Number of pages to generate
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
A plan dictionary with sections and steps
|
|
497
|
+
"""
|
|
498
|
+
# Calculate number of sections based on page count (1 page ≈ 2 sections + bibliography)
|
|
499
|
+
total_sections = min(page_count * 2 + 1, 20) # Cap at 20 sections
|
|
500
|
+
|
|
501
|
+
# Create standard sections
|
|
502
|
+
sections = [{"title": "Bibliography", "is_bibliography": True}]
|
|
503
|
+
|
|
504
|
+
# Add introduction
|
|
505
|
+
sections.append({"title": "Introduction", "is_bibliography": False})
|
|
506
|
+
|
|
507
|
+
# Add content sections based on page count
|
|
508
|
+
if page_count >= 2:
|
|
509
|
+
sections.append({"title": "Historical Context", "is_bibliography": False})
|
|
510
|
+
|
|
511
|
+
if page_count >= 3:
|
|
512
|
+
sections.append({"title": "Main Analysis", "is_bibliography": False})
|
|
513
|
+
sections.append({"title": "Key Findings", "is_bibliography": False})
|
|
514
|
+
|
|
515
|
+
if page_count >= 4:
|
|
516
|
+
sections.append({"title": "Detailed Examination", "is_bibliography": False})
|
|
517
|
+
sections.append({"title": "Critical Perspectives", "is_bibliography": False})
|
|
518
|
+
|
|
519
|
+
# Add more sections for longer reports
|
|
520
|
+
remaining_sections = total_sections - len(sections)
|
|
521
|
+
for i in range(remaining_sections):
|
|
522
|
+
sections.append({"title": f"Additional Analysis {i+1}", "is_bibliography": False})
|
|
523
|
+
|
|
524
|
+
# Always end with conclusion
|
|
525
|
+
sections.append({"title": "Conclusion", "is_bibliography": False})
|
|
526
|
+
|
|
527
|
+
return {
|
|
528
|
+
"topic": topic,
|
|
529
|
+
"total_sections": len(sections),
|
|
530
|
+
"sections": sections,
|
|
531
|
+
"current_section": 0,
|
|
532
|
+
"steps": [
|
|
533
|
+
"Initialize with topic",
|
|
534
|
+
"Search for sources",
|
|
535
|
+
"Create bibliography",
|
|
536
|
+
"Write introduction",
|
|
537
|
+
"Develop content sections",
|
|
538
|
+
"Write conclusion"
|
|
539
|
+
],
|
|
540
|
+
"current_step": 0,
|
|
541
|
+
"next_step": "Search for sources"
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
def _format_citation(self, record: Dict[str, Any]) -> str:
|
|
545
|
+
"""
|
|
546
|
+
Format a record as a citation.
|
|
547
|
+
|
|
548
|
+
Args:
|
|
549
|
+
record: The record to format
|
|
550
|
+
|
|
551
|
+
Returns:
|
|
552
|
+
Formatted citation as a string
|
|
553
|
+
"""
|
|
554
|
+
creator = record.get('creator', 'Unknown Author')
|
|
555
|
+
title = record.get('title', 'Unknown Title')
|
|
556
|
+
publisher = record.get('publisher', 'Unknown Publisher')
|
|
557
|
+
date = record.get('date', 'n.d.')
|
|
558
|
+
url = record.get('gallica_url', record.get('identifier', 'No URL available'))
|
|
559
|
+
|
|
560
|
+
# Format based on type - ensure doc_type is a string before calling lower()
|
|
561
|
+
doc_type = record.get('type', '')
|
|
562
|
+
if isinstance(doc_type, list):
|
|
563
|
+
# If type is a list, join it into a string
|
|
564
|
+
doc_type = ' '.join(str(t) for t in doc_type)
|
|
565
|
+
doc_type = doc_type.lower()
|
|
566
|
+
|
|
567
|
+
if 'monographie' in doc_type or 'book' in doc_type:
|
|
568
|
+
return f"{creator}. ({date}). {title}. {publisher}. Retrieved from {url}"
|
|
569
|
+
elif 'periodique' in doc_type or 'article' in doc_type:
|
|
570
|
+
return f"{creator}. ({date}). {title}. Retrieved from {url}"
|
|
571
|
+
else:
|
|
572
|
+
return f"{creator}. ({date}). {title}. {publisher}. Retrieved from {url}"
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
# Tool definition
|
|
576
|
+
BNF_SEQUENTIAL_REPORTING_TOOL = {
|
|
577
|
+
"name": "bnf_sequential_reporting",
|
|
578
|
+
"description": """A tool for generating comprehensive research reports using the Gallica BnF digital library.
|
|
579
|
+
This tool helps create well-structured, properly cited reports on any topic by breaking the process into sequential steps.
|
|
580
|
+
|
|
581
|
+
When to use this tool:
|
|
582
|
+
- Creating research reports on historical, literary, or cultural topics
|
|
583
|
+
- Generating academic papers with proper citations
|
|
584
|
+
- Compiling information from multiple Gallica sources into a cohesive document
|
|
585
|
+
- Producing educational materials based on primary and secondary sources
|
|
586
|
+
|
|
587
|
+
Key features:
|
|
588
|
+
- Automatically searches for relevant sources in the Gallica digital library
|
|
589
|
+
- Creates properly formatted citations in a bibliography
|
|
590
|
+
- Generates reports with a specified number of pages (default: 4)
|
|
591
|
+
- Supports sequential writing of report sections
|
|
592
|
+
- Includes in-text citations in the format [1], [2], etc.
|
|
593
|
+
- Maintains context across multiple sections
|
|
594
|
+
|
|
595
|
+
How it works:
|
|
596
|
+
1. First, provide a topic and optional configuration parameters
|
|
597
|
+
2. The tool searches for relevant sources in the Gallica digital library
|
|
598
|
+
3. Start by creating the bibliography as the first section
|
|
599
|
+
4. Then write each section of the report sequentially
|
|
600
|
+
5. Include in-text citations to reference sources from the bibliography
|
|
601
|
+
6. Continue until the report is complete
|
|
602
|
+
|
|
603
|
+
Parameters explained:
|
|
604
|
+
- topic: The research topic for the report (only needed for initialization)
|
|
605
|
+
- pageCount: Number of pages to generate (default: 4)
|
|
606
|
+
- sourceCount: Number of sources to find (default: 10)
|
|
607
|
+
- sectionNumber: Current section number in sequence
|
|
608
|
+
- totalSections: Total number of sections in the report
|
|
609
|
+
- title: Title of the current section
|
|
610
|
+
- content: The content of the current section
|
|
611
|
+
- isBibliography: Whether this section is the bibliography
|
|
612
|
+
- sourcesUsed: List of source IDs used in this section
|
|
613
|
+
- nextSectionNeeded: Whether another section is needed
|
|
614
|
+
- includeGraphics: Whether to include graphics in the report (default: False)
|
|
615
|
+
|
|
616
|
+
You should:
|
|
617
|
+
1. Start by providing a topic to initialize the research
|
|
618
|
+
2. Create the bibliography first as section 1
|
|
619
|
+
3. Write each section sequentially, including in-text citations [1], [2], etc.
|
|
620
|
+
4. Ensure each section builds on previous ones to create a cohesive report
|
|
621
|
+
5. Include a conclusion in the final section
|
|
622
|
+
6. Set nextSectionNeeded to false when the report is complete""",
|
|
623
|
+
"inputSchema": {
|
|
624
|
+
"type": "object",
|
|
625
|
+
"properties": {
|
|
626
|
+
"topic": {
|
|
627
|
+
"type": "string",
|
|
628
|
+
"description": "Research topic for the report (only needed for initialization)"
|
|
629
|
+
},
|
|
630
|
+
"pageCount": {
|
|
631
|
+
"type": "integer",
|
|
632
|
+
"description": "Number of pages to generate",
|
|
633
|
+
"minimum": 1,
|
|
634
|
+
"default": 4
|
|
635
|
+
},
|
|
636
|
+
"sourceCount": {
|
|
637
|
+
"type": "integer",
|
|
638
|
+
"description": "Number of sources to find",
|
|
639
|
+
"minimum": 1,
|
|
640
|
+
"default": 10
|
|
641
|
+
},
|
|
642
|
+
"sectionNumber": {
|
|
643
|
+
"type": "integer",
|
|
644
|
+
"description": "Current section number",
|
|
645
|
+
"minimum": 1
|
|
646
|
+
},
|
|
647
|
+
"totalSections": {
|
|
648
|
+
"type": "integer",
|
|
649
|
+
"description": "Total sections in the report",
|
|
650
|
+
"minimum": 1
|
|
651
|
+
},
|
|
652
|
+
"title": {
|
|
653
|
+
"type": "string",
|
|
654
|
+
"description": "Title of the current section"
|
|
655
|
+
},
|
|
656
|
+
"content": {
|
|
657
|
+
"type": "string",
|
|
658
|
+
"description": "Content of the current section"
|
|
659
|
+
},
|
|
660
|
+
"isBibliography": {
|
|
661
|
+
"type": "boolean",
|
|
662
|
+
"description": "Whether this section is the bibliography"
|
|
663
|
+
},
|
|
664
|
+
"sourcesUsed": {
|
|
665
|
+
"type": "array",
|
|
666
|
+
"items": {
|
|
667
|
+
"type": "integer"
|
|
668
|
+
},
|
|
669
|
+
"description": "List of source IDs used in this section"
|
|
670
|
+
},
|
|
671
|
+
"nextSectionNeeded": {
|
|
672
|
+
"type": "boolean",
|
|
673
|
+
"description": "Whether another section is needed"
|
|
674
|
+
},
|
|
675
|
+
"includeGraphics": {
|
|
676
|
+
"type": "boolean",
|
|
677
|
+
"description": "Whether to include graphics in the report",
|
|
678
|
+
"default": False
|
|
679
|
+
}
|
|
680
|
+
},
|
|
681
|
+
"required": ["sectionNumber", "totalSections", "title", "content", "nextSectionNeeded"]
|
|
682
|
+
}
|
|
683
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: iflow-mcp_mcp-bibliotheque_nationale_de_france
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server for accessing the Gallica digital library of the Bibliothèque nationale de France (BnF)
|
|
5
|
+
Author-email: iflow-mcp <iflow-mcp@example.com>
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Requires-Dist: requests==2.31.0
|
|
8
|
+
Requires-Dist: fastmcp==0.1.0
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
bnf_api/__init__.py,sha256=MrswwsMEz5SLsGbkO1GFjuDpqdCAM9uEYMz3pG0IYyE,514
|
|
2
|
+
bnf_api/api.py,sha256=bmkVldzyRwbpK33cCcr5YiO1h7LqXsgYAqcsgz291Sg,5723
|
|
3
|
+
bnf_api/config.py,sha256=fTSoqpc1jeBEESMwslA90NEvPZUbg5XE7vBKwUSTqZU,511
|
|
4
|
+
bnf_api/search.py,sha256=MpLB9DLVPdZAVi7QBI5ZgO8OAqqPwsLO07W576SngJ8,6867
|
|
5
|
+
bnf_api/sequential_reporting.py,sha256=SUW_E2Binf4nN91awENCjq-GT5qXkhGNqZzsXkDyjoI,28580
|
|
6
|
+
iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/METADATA,sha256=1nI6ObgBiNSVDlPw3VBi4CM1Q7P42qwpxOQNrR5BK0A,332
|
|
7
|
+
iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/entry_points.txt,sha256=ns5aJirazUxaubRuzmOZafplNnqPSd2B2ARIF-7sOXI,73
|
|
9
|
+
iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/top_level.txt,sha256=BeNw1ib6gAYJBqMhoWYY_1aAVxOgVQUwteI5oLAcelY,8
|
|
10
|
+
iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
bnf_api
|