iflow-mcp_mcp-bibliotheque_nationale_de_france 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bnf_api/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """
2
+ Gallica BnF API Package
3
+ ----------------------
4
+ This package provides tools to search and retrieve information from the Gallica digital library
5
+ of the Bibliothèque nationale de France (BnF) using their SRU API.
6
+ """
7
+
8
+ from .api import GallicaAPI
9
+ from .search import SearchAPI
10
+ from .config import DEFAULT_MAX_RECORDS, DEFAULT_START_RECORD, BNF_SRU_URL, DOCUMENT_TYPES
11
+
12
+ __all__ = [
13
+ 'GallicaAPI',
14
+ 'SearchAPI',
15
+ 'DEFAULT_MAX_RECORDS',
16
+ 'DEFAULT_START_RECORD',
17
+ 'BNF_SRU_URL',
18
+ 'DOCUMENT_TYPES'
19
+ ]
bnf_api/api.py ADDED
@@ -0,0 +1,148 @@
1
+ """
2
+ Gallica BnF API Client
3
+ ---------------------
4
+ Client for the Gallica BnF SRU API.
5
+ Provides methods to search for documents and retrieve metadata.
6
+ """
7
+
8
+ import logging
9
+ import requests
10
+ import xml.etree.ElementTree as ET
11
+ from datetime import datetime
12
+ from typing import Dict, Any, List, Optional
13
+
14
+ # Set up logging
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Constants
18
+ DEFAULT_MAX_RECORDS = 10
19
+ DEFAULT_START_RECORD = 1
20
+ BNF_SRU_URL = "https://gallica.bnf.fr/SRU"
21
+
22
+
23
+ class GallicaAPI:
24
+ """
25
+ Client for the Gallica BnF SRU API.
26
+ Provides methods to search for documents and retrieve metadata.
27
+ """
28
+
29
+ def __init__(self):
30
+ """Initialize the Gallica API client."""
31
+ self.base_url = BNF_SRU_URL
32
+ logger.info("Gallica API client initialized")
33
+
34
+ def search(self,
35
+ query: str,
36
+ start_record: int = DEFAULT_START_RECORD,
37
+ max_records: int = DEFAULT_MAX_RECORDS) -> Dict[str, Any]:
38
+ """
39
+ Search for documents in the Gallica digital library.
40
+
41
+ Args:
42
+ query: Search query in CQL format
43
+ start_record: Starting record number for pagination
44
+ max_records: Maximum number of records to return
45
+
46
+ Returns:
47
+ Dictionary containing search results and metadata
48
+ """
49
+ params = {
50
+ 'version': '1.2',
51
+ 'operation': 'searchRetrieve',
52
+ 'query': query,
53
+ 'startRecord': start_record,
54
+ 'maximumRecords': max_records
55
+ }
56
+
57
+ try:
58
+ response = requests.get(self.base_url, params=params)
59
+ response.raise_for_status()
60
+
61
+ # Parse the XML response
62
+ root = ET.fromstring(response.text)
63
+
64
+ # Define namespaces used in the XML
65
+ namespaces = {
66
+ 'srw': 'http://www.loc.gov/zing/srw/',
67
+ 'dc': 'http://purl.org/dc/elements/1.1/',
68
+ 'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/'
69
+ }
70
+
71
+ # Get the number of records found
72
+ num_records = root.find('.//srw:numberOfRecords', namespaces).text
73
+
74
+ # Create a dictionary to store the results
75
+ results = {
76
+ "metadata": {
77
+ "query": query,
78
+ "total_records": num_records,
79
+ "records_returned": len(root.findall('.//srw:record', namespaces)),
80
+ "date_retrieved": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
81
+ },
82
+ "records": []
83
+ }
84
+
85
+ # Process each record
86
+ for record in root.findall('.//srw:record', namespaces):
87
+ # Get the record data element that contains Dublin Core metadata
88
+ record_data = record.find('.//srw:recordData/oai_dc:dc', namespaces)
89
+
90
+ if record_data is not None:
91
+ # Create a dictionary for this record
92
+ record_dict = {}
93
+
94
+ # Define the Dublin Core fields we want to extract
95
+ dc_fields = [
96
+ 'title', 'creator', 'contributor', 'publisher', 'date',
97
+ 'description', 'type', 'format', 'identifier', 'source',
98
+ 'language', 'relation', 'coverage', 'rights', 'subject'
99
+ ]
100
+
101
+ # Extract each field
102
+ for field in dc_fields:
103
+ elements = record_data.findall(f'./dc:{field}', namespaces)
104
+ if elements:
105
+ # If there are multiple values, store them as a list
106
+ if len(elements) > 1:
107
+ record_dict[field] = [elem.text.strip() for elem in elements if elem.text and elem.text.strip()]
108
+ # If there's only one value, store it as a string
109
+ else:
110
+ text = elements[0].text
111
+ if text and text.strip():
112
+ record_dict[field] = text.strip()
113
+
114
+ # Extract Gallica URL from identifiers
115
+ if 'identifier' in record_dict:
116
+ identifiers = record_dict['identifier']
117
+ if isinstance(identifiers, list):
118
+ for identifier in identifiers:
119
+ if 'gallica.bnf.fr/ark:' in identifier:
120
+ record_dict['gallica_url'] = identifier
121
+ break
122
+ elif 'gallica.bnf.fr/ark:' in identifiers:
123
+ record_dict['gallica_url'] = identifiers
124
+
125
+ # Add the record to our results
126
+ results['records'].append(record_dict)
127
+
128
+ return results
129
+
130
+ except requests.exceptions.RequestException as e:
131
+ logger.error(f"Error during Gallica API request: {e}")
132
+ return {
133
+ "error": str(e),
134
+ "query": query,
135
+ "parameters": params
136
+ }
137
+ except ET.ParseError as e:
138
+ logger.error(f"Error parsing XML response: {e}")
139
+ return {
140
+ "error": f"XML parsing error: {str(e)}",
141
+ "query": query
142
+ }
143
+ except Exception as e:
144
+ logger.error(f"Unexpected error: {e}")
145
+ return {
146
+ "error": str(e),
147
+ "query": query
148
+ }
bnf_api/config.py ADDED
@@ -0,0 +1,23 @@
1
+ """
2
+ Configuration constants for the Gallica BnF API.
3
+ """
4
+
5
+ # Default API parameters
6
+ DEFAULT_MAX_RECORDS = 10
7
+ DEFAULT_START_RECORD = 1
8
+
9
+ # API URL
10
+ BNF_SRU_URL = "https://gallica.bnf.fr/SRU"
11
+
12
+ # Common document types in Gallica
13
+ DOCUMENT_TYPES = {
14
+ "monographie": "Books/Monographs",
15
+ "periodique": "Periodicals/Newspapers",
16
+ "image": "Images",
17
+ "manuscrit": "Manuscripts",
18
+ "carte": "Maps",
19
+ "musique": "Music scores",
20
+ "objet": "Objects",
21
+ "video": "Videos",
22
+ "son": "Audio recordings"
23
+ }
bnf_api/search.py ADDED
@@ -0,0 +1,193 @@
1
+ """
2
+ Search utilities for the Gallica BnF API.
3
+ Provides functions to build different types of search queries.
4
+ """
5
+
6
+ from typing import Dict, Any, List, Optional
7
+ from .api import GallicaAPI
8
+ from .config import DEFAULT_MAX_RECORDS, DEFAULT_START_RECORD
9
+
10
+
11
+ class SearchAPI:
12
+ """
13
+ Search utilities for the Gallica BnF API.
14
+ """
15
+
16
+ def __init__(self, gallica_api: GallicaAPI):
17
+ """
18
+ Initialize the Search API.
19
+
20
+ Args:
21
+ gallica_api: An initialized GallicaAPI instance
22
+ """
23
+ self.gallica_api = gallica_api
24
+
25
+ def search_by_title(
26
+ self,
27
+ title: str,
28
+ exact_match: bool = False,
29
+ max_results: int = DEFAULT_MAX_RECORDS,
30
+ start_record: int = DEFAULT_START_RECORD
31
+ ) -> Dict[str, Any]:
32
+ """
33
+ Search for documents in the Gallica digital library by title.
34
+
35
+ Args:
36
+ title: The title to search for
37
+ exact_match: If True, search for the exact title; otherwise, search for title containing the words
38
+ max_results: Maximum number of results to return (1-50)
39
+ start_record: Starting record for pagination
40
+
41
+ Returns:
42
+ Dictionary containing search results and metadata
43
+ """
44
+ if exact_match:
45
+ query = f'dc.title all "{title}"'
46
+ else:
47
+ query = f'dc.title all {title}'
48
+
49
+ return self.gallica_api.search(query, start_record, max_results)
50
+
51
+ def search_by_author(
52
+ self,
53
+ author: str,
54
+ exact_match: bool = False,
55
+ max_results: int = DEFAULT_MAX_RECORDS,
56
+ start_record: int = DEFAULT_START_RECORD
57
+ ) -> Dict[str, Any]:
58
+ """
59
+ Search for documents in the Gallica digital library by author.
60
+
61
+ Args:
62
+ author: The author name to search for
63
+ exact_match: If True, search for the exact author name; otherwise, search for author containing the words
64
+ max_results: Maximum number of results to return (1-50)
65
+ start_record: Starting record for pagination
66
+
67
+ Returns:
68
+ Dictionary containing search results and metadata
69
+ """
70
+ if exact_match:
71
+ query = f'dc.creator all "{author}"'
72
+ else:
73
+ query = f'dc.creator all {author}'
74
+
75
+ return self.gallica_api.search(query, start_record, max_results)
76
+
77
+ def search_by_subject(
78
+ self,
79
+ subject: str,
80
+ exact_match: bool = False,
81
+ max_results: int = DEFAULT_MAX_RECORDS,
82
+ start_record: int = DEFAULT_START_RECORD
83
+ ) -> Dict[str, Any]:
84
+ """
85
+ Search for documents in the Gallica digital library by subject.
86
+
87
+ Args:
88
+ subject: The subject to search for
89
+ exact_match: If True, search for the exact subject; otherwise, search for subject containing the words
90
+ max_results: Maximum number of results to return (1-50)
91
+ start_record: Starting record for pagination
92
+
93
+ Returns:
94
+ Dictionary containing search results and metadata
95
+ """
96
+ if exact_match:
97
+ query = f'dc.subject all "{subject}"'
98
+ else:
99
+ query = f'dc.subject all {subject}'
100
+
101
+ return self.gallica_api.search(query, start_record, max_results)
102
+
103
+ def search_by_date(
104
+ self,
105
+ date: str,
106
+ max_results: int = DEFAULT_MAX_RECORDS,
107
+ start_record: int = DEFAULT_START_RECORD
108
+ ) -> Dict[str, Any]:
109
+ """
110
+ Search for documents in the Gallica digital library by date.
111
+
112
+ Args:
113
+ date: The date to search for (format: YYYY or YYYY-MM or YYYY-MM-DD)
114
+ max_results: Maximum number of results to return (1-50)
115
+ start_record: Starting record for pagination
116
+
117
+ Returns:
118
+ Dictionary containing search results and metadata
119
+ """
120
+ query = f'dc.date all "{date}"'
121
+
122
+ return self.gallica_api.search(query, start_record, max_results)
123
+
124
+ def search_by_document_type(
125
+ self,
126
+ doc_type: str,
127
+ max_results: int = DEFAULT_MAX_RECORDS,
128
+ start_record: int = DEFAULT_START_RECORD
129
+ ) -> Dict[str, Any]:
130
+ """
131
+ Search for documents in the Gallica digital library by document type.
132
+
133
+ Args:
134
+ doc_type: The document type to search for (e.g., monographie, periodique, image, manuscrit, carte, musique, etc.)
135
+ max_results: Maximum number of results to return (1-50)
136
+ start_record: Starting record for pagination
137
+
138
+ Returns:
139
+ Dictionary containing search results and metadata
140
+ """
141
+ query = f'dc.type all "{doc_type}"'
142
+
143
+ return self.gallica_api.search(query, start_record, max_results)
144
+
145
+ def advanced_search(
146
+ self,
147
+ query: str,
148
+ max_results: int = DEFAULT_MAX_RECORDS,
149
+ start_record: int = DEFAULT_START_RECORD
150
+ ) -> Dict[str, Any]:
151
+ """
152
+ Perform an advanced search using custom CQL query syntax.
153
+
154
+ This method allows for complex queries using the CQL (Contextual Query Language) syntax.
155
+ Examples:
156
+ - Search for books by Victor Hugo: dc.creator all "Victor Hugo" and dc.type all "monographie"
157
+ - Search for maps about Paris: dc.subject all "Paris" and dc.type all "carte"
158
+ - Search for documents in English: dc.language all "eng"
159
+
160
+ Args:
161
+ query: Custom CQL query string
162
+ max_results: Maximum number of results to return (1-50)
163
+ start_record: Starting record for pagination
164
+
165
+ Returns:
166
+ Dictionary containing search results and metadata
167
+ """
168
+ return self.gallica_api.search(query, start_record, max_results)
169
+
170
+ def natural_language_search(
171
+ self,
172
+ query: str,
173
+ max_results: int = DEFAULT_MAX_RECORDS,
174
+ start_record: int = DEFAULT_START_RECORD
175
+ ) -> Dict[str, Any]:
176
+ """
177
+ Search the Gallica digital library using natural language.
178
+
179
+ This is a simplified search that uses the 'gallica all' operator to search across all fields.
180
+ It's the most user-friendly way to search but may not be as precise as the other search methods.
181
+
182
+ Args:
183
+ query: Natural language search query
184
+ max_results: Maximum number of results to return (1-50)
185
+ start_record: Starting record for pagination
186
+
187
+ Returns:
188
+ Dictionary containing search results and metadata
189
+ """
190
+ # Format the query for the Gallica API
191
+ formatted_query = f'gallica all "{query}"'
192
+
193
+ return self.gallica_api.search(formatted_query, start_record, max_results)
@@ -0,0 +1,683 @@
1
+ """
2
+ BnF Sequential Reporting Tool
3
+ ----------------------------
4
+ This module provides a tool for generating structured reports based on research
5
+ from the Gallica BnF digital library. It uses a sequential approach to gather sources,
6
+ analyze them, and generate a comprehensive report with proper citations.
7
+ """
8
+
9
+ import json
10
+ import sys
11
+ import logging
12
+ from dataclasses import dataclass
13
+ from typing import Dict, List, Optional, Any, Union
14
+ from datetime import datetime
15
+ import textwrap
16
+
17
+ from .api import GallicaAPI
18
+ from .search import SearchAPI
19
+
20
+ # Set up logging
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Constants
24
+ DEFAULT_PAGE_COUNT = 4
25
+ DEFAULT_SOURCE_COUNT = 10
26
+
27
+
28
+ @dataclass
29
+ class ReportSection:
30
+ """
31
+ Represents a section of the sequential report.
32
+ """
33
+ section_number: int
34
+ total_sections: int
35
+ content: str
36
+ title: str
37
+ is_bibliography: bool = False
38
+ sources_used: List[int] = None
39
+ next_section_needed: bool = True
40
+
41
+
42
+ class SequentialReportingServer:
43
+ """
44
+ Server for generating sequential reports based on BnF research.
45
+ """
46
+
47
+ def __init__(self, gallica_api: GallicaAPI, search_api: SearchAPI):
48
+ """
49
+ Initialize the Sequential Reporting Server.
50
+
51
+ Args:
52
+ gallica_api: An initialized GallicaAPI instance
53
+ search_api: An initialized SearchAPI instance
54
+ """
55
+ self.gallica_api = gallica_api
56
+ self.search_api = search_api
57
+ self.topic = None
58
+ self.page_count = DEFAULT_PAGE_COUNT
59
+ self.source_count = DEFAULT_SOURCE_COUNT
60
+ self.sources = []
61
+ self.report_sections = []
62
+ self.plan = None
63
+ self._current_step = 0
64
+ self.include_graphics = False
65
+ self.graphics = []
66
+
67
+ def validate_section_data(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
68
+ """
69
+ Validate the input data for a section.
70
+
71
+ Args:
72
+ input_data: The input data for the section
73
+
74
+ Returns:
75
+ Validated input data
76
+ """
77
+ validated_data = {}
78
+
79
+ # Handle initialization with topic
80
+ if 'topic' in input_data:
81
+ validated_data['topic'] = str(input_data['topic'])
82
+ if 'page_count' in input_data:
83
+ try:
84
+ validated_data['page_count'] = int(input_data['page_count'])
85
+ except (ValueError, TypeError):
86
+ validated_data['page_count'] = DEFAULT_PAGE_COUNT
87
+ if 'source_count' in input_data:
88
+ try:
89
+ validated_data['source_count'] = int(input_data['source_count'])
90
+ except (ValueError, TypeError):
91
+ validated_data['source_count'] = DEFAULT_SOURCE_COUNT
92
+ if 'include_graphics' in input_data:
93
+ validated_data['include_graphics'] = bool(input_data['include_graphics'])
94
+ return validated_data
95
+
96
+ # Handle search_sources flag
97
+ if 'search_sources' in input_data and input_data['search_sources']:
98
+ validated_data['search_sources'] = True
99
+ return validated_data
100
+
101
+ # Check if required fields are present for section data
102
+ required_fields = ['section_number', 'total_sections']
103
+ for field in required_fields:
104
+ if field not in input_data:
105
+ raise ValueError(f"Missing required field: {field}")
106
+
107
+ # Convert section_number and total_sections to integers if they're strings
108
+ section_number = input_data['section_number']
109
+ if isinstance(section_number, str) and section_number.isdigit():
110
+ section_number = int(section_number)
111
+ elif not isinstance(section_number, int):
112
+ raise ValueError(f"Invalid sectionNumber: must be a number")
113
+
114
+ total_sections = input_data['total_sections']
115
+ if isinstance(total_sections, str) and total_sections.isdigit():
116
+ total_sections = int(total_sections)
117
+ elif not isinstance(total_sections, int):
118
+ raise ValueError(f"Invalid totalSections: must be a number")
119
+
120
+ # Get title
121
+ title = input_data.get('title', f"Section {section_number}")
122
+
123
+ # Get content (empty string if not provided)
124
+ content = input_data.get('content', '')
125
+ if content is None:
126
+ content = ''
127
+ if not isinstance(content, str):
128
+ raise ValueError(f"Invalid content: must be a string")
129
+
130
+ # Get is_bibliography flag
131
+ is_bibliography = input_data.get('is_bibliography', False)
132
+
133
+ # Get sources_used (empty list if not provided)
134
+ sources_used = input_data.get('sources_used', [])
135
+ if sources_used is None:
136
+ sources_used = []
137
+
138
+ # Get next_section_needed flag
139
+ next_section_needed = input_data.get('next_section_needed', True)
140
+
141
+ # Create and return ReportSection
142
+ return {
143
+ 'section_number': section_number,
144
+ 'total_sections': total_sections,
145
+ 'title': title,
146
+ 'content': content,
147
+ 'is_bibliography': is_bibliography,
148
+ 'sources_used': sources_used,
149
+ 'next_section_needed': next_section_needed
150
+ }
151
+
152
+ def search_sources(self, topic: str, source_count: int = DEFAULT_SOURCE_COUNT) -> List[Dict[str, Any]]:
153
+ """
154
+ Search for sources on the given topic.
155
+
156
+ Args:
157
+ topic: The topic to search for
158
+ source_count: The number of sources to retrieve
159
+
160
+ Returns:
161
+ List of sources as dictionaries
162
+ """
163
+ try:
164
+ # Try natural language search first
165
+ results = self.search_api.natural_language_search(topic, max_results=source_count)
166
+
167
+ # If not enough results, try subject search
168
+ if len(results.get('records', [])) < source_count:
169
+ subject_results = self.search_api.search_by_subject(topic, max_results=source_count - len(results.get('records', [])))
170
+ # Combine results
171
+ all_records = results.get('records', []) + subject_results.get('records', [])
172
+ results['records'] = all_records
173
+
174
+ # Format the results
175
+ sources = []
176
+ for i, result in enumerate(results.get('records', [])[:source_count], 1):
177
+ source = {
178
+ 'id': i,
179
+ 'title': result.get('title', 'Unknown Title'),
180
+ 'creator': result.get('creator', 'Unknown Author'),
181
+ 'date': result.get('date', 'Unknown Date'),
182
+ 'type': result.get('type', 'Unknown Type'),
183
+ 'language': result.get('language', 'Unknown Language'),
184
+ 'url': result.get('url', ''),
185
+ 'citation': self._format_citation(result),
186
+ 'thumbnail': result.get('thumbnail', '')
187
+ }
188
+ sources.append(source)
189
+
190
+ return sources
191
+ except Exception as e:
192
+ print(f"Error searching for sources: {e}")
193
+ return []
194
+
195
+ def search_graphics(self, topic: str, count: int = 5) -> List[Dict[str, Any]]:
196
+ """
197
+ Search for graphics (images, maps) related to the topic.
198
+
199
+ Args:
200
+ topic: The topic to search for
201
+ count: The number of graphics to retrieve
202
+
203
+ Returns:
204
+ List of graphics as dictionaries
205
+ """
206
+ try:
207
+ # Break down the topic into keywords for better search results
208
+ keywords = topic.split()
209
+ main_keyword = keywords[0] if keywords else topic
210
+
211
+ # Search for images with broader terms
212
+ image_query = f'gallica all "{main_keyword}" and dc.type all "image"'
213
+ image_results = self.search_api.advanced_search(image_query, max_results=count)
214
+
215
+ # If no results, try with the full topic
216
+ if not image_results.get('records', []):
217
+ image_query = f'gallica all "{topic}" and dc.type all "image"'
218
+ image_results = self.search_api.advanced_search(image_query, max_results=count)
219
+
220
+ # Search for maps with broader terms
221
+ map_query = f'gallica all "{main_keyword}" and dc.type all "carte"'
222
+ map_results = self.search_api.advanced_search(map_query, max_results=count)
223
+
224
+ # If no results, try with the full topic
225
+ if not map_results.get('records', []):
226
+ map_query = f'gallica all "{topic}" and dc.type all "carte"'
227
+ map_results = self.search_api.advanced_search(map_query, max_results=count)
228
+
229
+ # If still no results, try a more general search for any visual material
230
+ if not image_results.get('records', []) and not map_results.get('records', []):
231
+ general_query = f'gallica all "{main_keyword}" and (dc.type all "image" or dc.type all "carte" or dc.type all "estampe")'
232
+ general_results = self.search_api.advanced_search(general_query, max_results=count)
233
+ image_results = general_results
234
+
235
+ # Combine and format results
236
+ graphics = []
237
+
238
+ # Process image results
239
+ for i, result in enumerate(image_results.get('records', []), 1):
240
+ # Extract URL from gallica_url if available (without /thumbnail suffix)
241
+ url = result.get('gallica_url', '')
242
+ thumbnail = ''
243
+ if url:
244
+ # Remove /thumbnail suffix if it exists
245
+ url = url.replace('/thumbnail', '')
246
+ ark_id = url.split('ark:')[1] if 'ark:' in url else ''
247
+ if ark_id:
248
+ thumbnail = f"https://gallica.bnf.fr/ark:{ark_id}/thumbnail"
249
+
250
+ graphic = {
251
+ 'id': i,
252
+ 'title': result.get('title', 'Untitled Image'),
253
+ 'description': f"Image related to {topic}: {result.get('title', 'Untitled Image')}",
254
+ 'type': 'image',
255
+ 'url': url,
256
+ 'thumbnail': thumbnail
257
+ }
258
+ graphics.append(graphic)
259
+
260
+ # Process map results
261
+ for i, result in enumerate(map_results.get('records', []), len(graphics) + 1):
262
+ # Extract URL from gallica_url if available (without /thumbnail suffix)
263
+ url = result.get('gallica_url', '')
264
+ thumbnail = ''
265
+ if url:
266
+ # Remove /thumbnail suffix if it exists
267
+ url = url.replace('/thumbnail', '')
268
+ ark_id = url.split('ark:')[1] if 'ark:' in url else ''
269
+ if ark_id:
270
+ thumbnail = f"https://gallica.bnf.fr/ark:{ark_id}/thumbnail"
271
+
272
+ graphic = {
273
+ 'id': i,
274
+ 'title': result.get('title', 'Untitled Map'),
275
+ 'description': f"Map related to {topic}: {result.get('title', 'Untitled Map')}",
276
+ 'type': 'map',
277
+ 'url': url,
278
+ 'thumbnail': thumbnail
279
+ }
280
+ graphics.append(graphic)
281
+
282
+ # If we still have no graphics, create some placeholder graphics with generic URLs
283
+ if not graphics:
284
+ # Create some placeholder graphics
285
+ graphics = [
286
+ {
287
+ 'id': 1,
288
+ 'title': f"Illustration related to {topic}",
289
+ 'description': f"Illustration related to {topic}",
290
+ 'type': 'image',
291
+ 'url': 'https://gallica.bnf.fr/',
292
+ 'thumbnail': 'https://gallica.bnf.fr/themes/gallica2015/images/logo-gallica.png'
293
+ },
294
+ {
295
+ 'id': 2,
296
+ 'title': f"Map related to {topic}",
297
+ 'description': f"Map related to {topic}",
298
+ 'type': 'map',
299
+ 'url': 'https://gallica.bnf.fr/',
300
+ 'thumbnail': 'https://gallica.bnf.fr/themes/gallica2015/images/logo-gallica.png'
301
+ }
302
+ ]
303
+
304
+ return graphics[:count]
305
+ except Exception as e:
306
+ print(f"Error searching for graphics: {e}")
307
+ return []
308
+
309
+ def process_section(self, input_data: Any) -> Dict[str, Any]:
310
+ """
311
+ Process a report section following a sequential approach.
312
+
313
+ Args:
314
+ input_data: The input data for the section
315
+
316
+ Returns:
317
+ Response data as a dictionary
318
+ """
319
+ try:
320
+ # Validate the input data
321
+ data = self.validate_section_data(input_data)
322
+
323
+ # Initialize with topic
324
+ if 'topic' in data:
325
+ self.topic = data['topic']
326
+ self.page_count = data.get('page_count', DEFAULT_PAGE_COUNT)
327
+ self.source_count = data.get('source_count', DEFAULT_SOURCE_COUNT)
328
+ self.include_graphics = data.get('include_graphics', False)
329
+ self.sources = []
330
+ self.graphics = []
331
+ self.report_sections = []
332
+ self._current_step = 0
333
+
334
+ # Create a plan for the report
335
+ self.plan = self.create_plan(self.topic, self.page_count)
336
+
337
+ return {
338
+ 'content': [{
339
+ 'text': json.dumps({
340
+ 'topic': self.topic,
341
+ 'pageCount': self.page_count,
342
+ 'sourceCount': self.source_count,
343
+ 'includeGraphics': self.include_graphics,
344
+ 'plan': self.plan,
345
+ 'nextStep': 'Search for sources using natural_language_search or search_by_subject'
346
+ })
347
+ }]
348
+ }
349
+
350
+ # Search for sources
351
+ if data.get('search_sources', False):
352
+ if not self.topic:
353
+ return {'content': [{'text': 'Error: No topic specified. Please initialize with a topic first.'}]}
354
+
355
+ self.sources = self.search_sources(self.topic, self.source_count)
356
+
357
+ # If graphics are requested, search for them
358
+ if self.include_graphics:
359
+ self.graphics = self.search_graphics(self.topic, count=5)
360
+
361
+ self._current_step = 1
362
+
363
+ return {
364
+ 'content': [{
365
+ 'text': json.dumps({
366
+ 'sources': self.sources,
367
+ 'graphics': self.graphics if self.include_graphics else [],
368
+ 'nextStep': 'Create bibliography section'
369
+ })
370
+ }]
371
+ }
372
+
373
+ # Process section data for bibliography or content sections
374
+ validated_input = self.validate_section_data(input_data)
375
+
376
+ # Adjust total sections if needed
377
+ if validated_input['section_number'] > validated_input['total_sections']:
378
+ validated_input['total_sections'] = validated_input['section_number']
379
+
380
+ # Add section to report
381
+ self.report_sections.append(validated_input)
382
+
383
+ # Format and display section
384
+ formatted_section = self.format_section(validated_input)
385
+ print(formatted_section, file=sys.stderr)
386
+
387
+ # Update current step in plan
388
+ if self.plan:
389
+ self.plan["current_section"] = validated_input['section_number']
390
+ if validated_input['section_number'] < len(self.plan["sections"]):
391
+ next_section_title = self.plan["sections"][validated_input['section_number']]["title"]
392
+ next_step = f"Create section {validated_input['section_number'] + 1}: {next_section_title}"
393
+ else:
394
+ next_step = "Report complete"
395
+ else:
396
+ next_step = "Continue writing the report"
397
+ if not validated_input['next_section_needed']:
398
+ next_step = "Report complete"
399
+
400
+ # Calculate progress
401
+ progress = (len(self.report_sections) / validated_input['total_sections']) * 100
402
+
403
+ return {
404
+ 'content': [{
405
+ 'text': json.dumps({
406
+ 'sectionNumber': validated_input['section_number'],
407
+ 'totalSections': validated_input['total_sections'],
408
+ 'nextSectionNeeded': validated_input['next_section_needed'],
409
+ 'progress': f"{progress:.1f}%",
410
+ 'reportSectionsCount': len(self.report_sections),
411
+ 'nextStep': next_step,
412
+ 'sources': self.sources if validated_input['is_bibliography'] else None
413
+ })
414
+ }]
415
+ }
416
+
417
+ except Exception as error:
418
+ logger.error(f"Error processing report section: {error}")
419
+ return {
420
+ 'content': [{
421
+ 'text': json.dumps({
422
+ 'error': str(error),
423
+ 'status': 'failed'
424
+ })
425
+ }],
426
+ 'isError': True
427
+ }
428
+
429
+ def format_section(self, section: Dict[str, Any]) -> str:
430
+ """
431
+ Format a report section for display.
432
+
433
+ Args:
434
+ section: The report section to format
435
+
436
+ Returns:
437
+ Formatted section as a string
438
+ """
439
+ # Get section information
440
+ section_number = section.get('section_number', 0)
441
+ total_sections = section.get('total_sections', 0)
442
+ title = section.get('title', 'Untitled')
443
+ is_bibliography = section.get('is_bibliography', False)
444
+
445
+ # Create a box for the section
446
+ width = 80
447
+ icon = "\033[93m📚\033[0m" if is_bibliography else "\033[94m📄\033[0m" # Yellow for bibliography, blue for content
448
+
449
+ header = f" {icon} Section{section_number}/{total_sections}: {title} "
450
+
451
+ box = "┌" + "─" * (width - 2) + "┐\n"
452
+ box += "│" + header + " " * (width - len(header) - 2) + "│\n"
453
+ box += "├" + "─" * (width - 2) + "┤\n"
454
+
455
+ # Add content
456
+ content = section.get('content', '')
457
+ if content:
458
+ # Wrap content to fit in the box
459
+ wrapped_content = textwrap.wrap(content, width=width-4)
460
+ for line in wrapped_content:
461
+ box += "│ " + line + " " * (width - len(line) - 4) + " │\n"
462
+
463
+ # Add graphics if available and this is not a bibliography
464
+ if not is_bibliography and self.include_graphics and self.graphics:
465
+ # Find graphics relevant to this section
466
+ section_graphics = []
467
+ for graphic in self.graphics:
468
+ # Simple relevance check - could be improved
469
+ if any(term in graphic['title'].lower() for term in title.lower().split()):
470
+ section_graphics.append(graphic)
471
+
472
+ # Add up to 2 graphics for this section
473
+ if section_graphics:
474
+ box += "│ " + " " * (width - 4) + " │\n"
475
+ box += "│ " + "Graphics:" + " " * (width - 13) + " │\n"
476
+ for graphic in section_graphics[:2]:
477
+ desc = f"- {graphic['description']}"
478
+ wrapped_desc = textwrap.wrap(desc, width=width-4)
479
+ for line in wrapped_desc:
480
+ box += "│ " + line + " " * (width - len(line) - 4) + " │\n"
481
+ box += "│ " + f" URL: {graphic['url']}" + " " * (width - len(f" URL: {graphic['url']}") - 4) + " │\n"
482
+
483
+ box += "└" + "─" * (width - 2) + "┘"
484
+
485
+ return box
486
+
487
+ def create_plan(self, topic: str, page_count: int = DEFAULT_PAGE_COUNT) -> Dict[str, Any]:
488
+ """
489
+ Create a sequential plan for the report based on the topic.
490
+
491
+ Args:
492
+ topic: The research topic
493
+ page_count: Number of pages to generate
494
+
495
+ Returns:
496
+ A plan dictionary with sections and steps
497
+ """
498
+ # Calculate number of sections based on page count (1 page ≈ 2 sections + bibliography)
499
+ total_sections = min(page_count * 2 + 1, 20) # Cap at 20 sections
500
+
501
+ # Create standard sections
502
+ sections = [{"title": "Bibliography", "is_bibliography": True}]
503
+
504
+ # Add introduction
505
+ sections.append({"title": "Introduction", "is_bibliography": False})
506
+
507
+ # Add content sections based on page count
508
+ if page_count >= 2:
509
+ sections.append({"title": "Historical Context", "is_bibliography": False})
510
+
511
+ if page_count >= 3:
512
+ sections.append({"title": "Main Analysis", "is_bibliography": False})
513
+ sections.append({"title": "Key Findings", "is_bibliography": False})
514
+
515
+ if page_count >= 4:
516
+ sections.append({"title": "Detailed Examination", "is_bibliography": False})
517
+ sections.append({"title": "Critical Perspectives", "is_bibliography": False})
518
+
519
+ # Add more sections for longer reports
520
+ remaining_sections = total_sections - len(sections)
521
+ for i in range(remaining_sections):
522
+ sections.append({"title": f"Additional Analysis {i+1}", "is_bibliography": False})
523
+
524
+ # Always end with conclusion
525
+ sections.append({"title": "Conclusion", "is_bibliography": False})
526
+
527
+ return {
528
+ "topic": topic,
529
+ "total_sections": len(sections),
530
+ "sections": sections,
531
+ "current_section": 0,
532
+ "steps": [
533
+ "Initialize with topic",
534
+ "Search for sources",
535
+ "Create bibliography",
536
+ "Write introduction",
537
+ "Develop content sections",
538
+ "Write conclusion"
539
+ ],
540
+ "current_step": 0,
541
+ "next_step": "Search for sources"
542
+ }
543
+
544
+ def _format_citation(self, record: Dict[str, Any]) -> str:
545
+ """
546
+ Format a record as a citation.
547
+
548
+ Args:
549
+ record: The record to format
550
+
551
+ Returns:
552
+ Formatted citation as a string
553
+ """
554
+ creator = record.get('creator', 'Unknown Author')
555
+ title = record.get('title', 'Unknown Title')
556
+ publisher = record.get('publisher', 'Unknown Publisher')
557
+ date = record.get('date', 'n.d.')
558
+ url = record.get('gallica_url', record.get('identifier', 'No URL available'))
559
+
560
+ # Format based on type - ensure doc_type is a string before calling lower()
561
+ doc_type = record.get('type', '')
562
+ if isinstance(doc_type, list):
563
+ # If type is a list, join it into a string
564
+ doc_type = ' '.join(str(t) for t in doc_type)
565
+ doc_type = doc_type.lower()
566
+
567
+ if 'monographie' in doc_type or 'book' in doc_type:
568
+ return f"{creator}. ({date}). {title}. {publisher}. Retrieved from {url}"
569
+ elif 'periodique' in doc_type or 'article' in doc_type:
570
+ return f"{creator}. ({date}). {title}. Retrieved from {url}"
571
+ else:
572
+ return f"{creator}. ({date}). {title}. {publisher}. Retrieved from {url}"
573
+
574
+
575
+ # Tool definition
576
+ BNF_SEQUENTIAL_REPORTING_TOOL = {
577
+ "name": "bnf_sequential_reporting",
578
+ "description": """A tool for generating comprehensive research reports using the Gallica BnF digital library.
579
+ This tool helps create well-structured, properly cited reports on any topic by breaking the process into sequential steps.
580
+
581
+ When to use this tool:
582
+ - Creating research reports on historical, literary, or cultural topics
583
+ - Generating academic papers with proper citations
584
+ - Compiling information from multiple Gallica sources into a cohesive document
585
+ - Producing educational materials based on primary and secondary sources
586
+
587
+ Key features:
588
+ - Automatically searches for relevant sources in the Gallica digital library
589
+ - Creates properly formatted citations in a bibliography
590
+ - Generates reports with a specified number of pages (default: 4)
591
+ - Supports sequential writing of report sections
592
+ - Includes in-text citations in the format [1], [2], etc.
593
+ - Maintains context across multiple sections
594
+
595
+ How it works:
596
+ 1. First, provide a topic and optional configuration parameters
597
+ 2. The tool searches for relevant sources in the Gallica digital library
598
+ 3. Start by creating the bibliography as the first section
599
+ 4. Then write each section of the report sequentially
600
+ 5. Include in-text citations to reference sources from the bibliography
601
+ 6. Continue until the report is complete
602
+
603
+ Parameters explained:
604
+ - topic: The research topic for the report (only needed for initialization)
605
+ - pageCount: Number of pages to generate (default: 4)
606
+ - sourceCount: Number of sources to find (default: 10)
607
+ - sectionNumber: Current section number in sequence
608
+ - totalSections: Total number of sections in the report
609
+ - title: Title of the current section
610
+ - content: The content of the current section
611
+ - isBibliography: Whether this section is the bibliography
612
+ - sourcesUsed: List of source IDs used in this section
613
+ - nextSectionNeeded: Whether another section is needed
614
+ - includeGraphics: Whether to include graphics in the report (default: False)
615
+
616
+ You should:
617
+ 1. Start by providing a topic to initialize the research
618
+ 2. Create the bibliography first as section 1
619
+ 3. Write each section sequentially, including in-text citations [1], [2], etc.
620
+ 4. Ensure each section builds on previous ones to create a cohesive report
621
+ 5. Include a conclusion in the final section
622
+ 6. Set nextSectionNeeded to false when the report is complete""",
623
+ "inputSchema": {
624
+ "type": "object",
625
+ "properties": {
626
+ "topic": {
627
+ "type": "string",
628
+ "description": "Research topic for the report (only needed for initialization)"
629
+ },
630
+ "pageCount": {
631
+ "type": "integer",
632
+ "description": "Number of pages to generate",
633
+ "minimum": 1,
634
+ "default": 4
635
+ },
636
+ "sourceCount": {
637
+ "type": "integer",
638
+ "description": "Number of sources to find",
639
+ "minimum": 1,
640
+ "default": 10
641
+ },
642
+ "sectionNumber": {
643
+ "type": "integer",
644
+ "description": "Current section number",
645
+ "minimum": 1
646
+ },
647
+ "totalSections": {
648
+ "type": "integer",
649
+ "description": "Total sections in the report",
650
+ "minimum": 1
651
+ },
652
+ "title": {
653
+ "type": "string",
654
+ "description": "Title of the current section"
655
+ },
656
+ "content": {
657
+ "type": "string",
658
+ "description": "Content of the current section"
659
+ },
660
+ "isBibliography": {
661
+ "type": "boolean",
662
+ "description": "Whether this section is the bibliography"
663
+ },
664
+ "sourcesUsed": {
665
+ "type": "array",
666
+ "items": {
667
+ "type": "integer"
668
+ },
669
+ "description": "List of source IDs used in this section"
670
+ },
671
+ "nextSectionNeeded": {
672
+ "type": "boolean",
673
+ "description": "Whether another section is needed"
674
+ },
675
+ "includeGraphics": {
676
+ "type": "boolean",
677
+ "description": "Whether to include graphics in the report",
678
+ "default": False
679
+ }
680
+ },
681
+ "required": ["sectionNumber", "totalSections", "title", "content", "nextSectionNeeded"]
682
+ }
683
+ }
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: iflow-mcp_mcp-bibliotheque_nationale_de_france
3
+ Version: 0.1.0
4
+ Summary: MCP server for accessing the Gallica digital library of the Bibliothèque nationale de France (BnF)
5
+ Author-email: iflow-mcp <iflow-mcp@example.com>
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: requests==2.31.0
8
+ Requires-Dist: fastmcp==0.1.0
@@ -0,0 +1,10 @@
1
+ bnf_api/__init__.py,sha256=MrswwsMEz5SLsGbkO1GFjuDpqdCAM9uEYMz3pG0IYyE,514
2
+ bnf_api/api.py,sha256=bmkVldzyRwbpK33cCcr5YiO1h7LqXsgYAqcsgz291Sg,5723
3
+ bnf_api/config.py,sha256=fTSoqpc1jeBEESMwslA90NEvPZUbg5XE7vBKwUSTqZU,511
4
+ bnf_api/search.py,sha256=MpLB9DLVPdZAVi7QBI5ZgO8OAqqPwsLO07W576SngJ8,6867
5
+ bnf_api/sequential_reporting.py,sha256=SUW_E2Binf4nN91awENCjq-GT5qXkhGNqZzsXkDyjoI,28580
6
+ iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/METADATA,sha256=1nI6ObgBiNSVDlPw3VBi4CM1Q7P42qwpxOQNrR5BK0A,332
7
+ iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/entry_points.txt,sha256=ns5aJirazUxaubRuzmOZafplNnqPSd2B2ARIF-7sOXI,73
9
+ iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/top_level.txt,sha256=BeNw1ib6gAYJBqMhoWYY_1aAVxOgVQUwteI5oLAcelY,8
10
+ iflow_mcp_mcp_bibliotheque_nationale_de_france-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mcp-bibliotheque_nationale_de_france = bnf_server:main