docling 2.14.0__py3-none-any.whl → 2.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,10 +37,10 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
37
37
 
38
38
  try:
39
39
  if isinstance(self.path_or_stream, BytesIO):
40
- text_stream = self.path_or_stream.getvalue().decode("utf-8")
40
+ text_stream = self.path_or_stream.getvalue()
41
41
  self.soup = BeautifulSoup(text_stream, "html.parser")
42
42
  if isinstance(self.path_or_stream, Path):
43
- with open(self.path_or_stream, "r", encoding="utf-8") as f:
43
+ with open(self.path_or_stream, "rb") as f:
44
44
  html_content = f.read()
45
45
  self.soup = BeautifulSoup(html_content, "html.parser")
46
46
  except Exception as e:
@@ -16,7 +16,7 @@ from docling_core.types.doc import (
16
16
  TableCell,
17
17
  TableData,
18
18
  )
19
- from PIL import Image
19
+ from PIL import Image, UnidentifiedImageError
20
20
  from pptx import Presentation
21
21
  from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
22
22
 
@@ -120,6 +120,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
120
120
  bullet_type = "None"
121
121
  list_text = ""
122
122
  list_label = GroupLabel.LIST
123
+ doc_label = DocItemLabel.LIST_ITEM
123
124
  prov = self.generate_prov(shape, slide_ind, shape.text.strip())
124
125
 
125
126
  # Identify if shape contains lists
@@ -276,16 +277,19 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
276
277
  im_dpi, _ = image.dpi
277
278
 
278
279
  # Open it with PIL
279
- pil_image = Image.open(BytesIO(image_bytes))
280
-
281
- # shape has picture
282
- prov = self.generate_prov(shape, slide_ind, "")
283
- doc.add_picture(
284
- parent=parent_slide,
285
- image=ImageRef.from_pil(image=pil_image, dpi=im_dpi),
286
- caption=None,
287
- prov=prov,
288
- )
280
+ try:
281
+ pil_image = Image.open(BytesIO(image_bytes))
282
+
283
+ # shape has picture
284
+ prov = self.generate_prov(shape, slide_ind, "")
285
+ doc.add_picture(
286
+ parent=parent_slide,
287
+ image=ImageRef.from_pil(image=pil_image, dpi=im_dpi),
288
+ caption=None,
289
+ prov=prov,
290
+ )
291
+ except (UnidentifiedImageError, OSError) as e:
292
+ _log.warning(f"Warning: image cannot be loaded by Pillow: {e}")
289
293
  return
290
294
 
291
295
  def handle_tables(self, shape, parent_slide, slide_ind, doc):
docling/cli/main.py CHANGED
@@ -164,6 +164,11 @@ def convert(
164
164
  to_formats: List[OutputFormat] = typer.Option(
165
165
  None, "--to", help="Specify output formats. Defaults to Markdown."
166
166
  ),
167
+ headers: str = typer.Option(
168
+ None,
169
+ "--headers",
170
+ help="Specify http request headers used when fetching url input sources in the form of a JSON string",
171
+ ),
167
172
  image_export_mode: Annotated[
168
173
  ImageRefMode,
169
174
  typer.Option(
@@ -279,12 +284,19 @@ def convert(
279
284
  if from_formats is None:
280
285
  from_formats = [e for e in InputFormat]
281
286
 
287
+ parsed_headers: Optional[Dict[str, str]] = None
288
+ if headers is not None:
289
+ headers_t = TypeAdapter(Dict[str, str])
290
+ parsed_headers = headers_t.validate_json(headers)
291
+
282
292
  with tempfile.TemporaryDirectory() as tempdir:
283
293
  input_doc_paths: List[Path] = []
284
294
  for src in input_sources:
285
295
  try:
286
296
  # check if we can fetch some remote url
287
- source = resolve_source_to_path(source=src, workdir=Path(tempdir))
297
+ source = resolve_source_to_path(
298
+ source=src, headers=parsed_headers, workdir=Path(tempdir)
299
+ )
288
300
  input_doc_paths.append(source)
289
301
  except FileNotFoundError:
290
302
  err_console.print(
@@ -390,7 +402,7 @@ def convert(
390
402
  start_time = time.time()
391
403
 
392
404
  conv_results = doc_converter.convert_all(
393
- input_doc_paths, raises_on_error=abort_on_error
405
+ input_doc_paths, headers=parsed_headers, raises_on_error=abort_on_error
394
406
  )
395
407
 
396
408
  output.mkdir(parents=True, exist_ok=True)
@@ -227,13 +227,18 @@ class _DummyBackend(AbstractDocumentBackend):
227
227
  class _DocumentConversionInput(BaseModel):
228
228
 
229
229
  path_or_stream_iterator: Iterable[Union[Path, str, DocumentStream]]
230
+ headers: Optional[Dict[str, str]] = None
230
231
  limits: Optional[DocumentLimits] = DocumentLimits()
231
232
 
232
233
  def docs(
233
234
  self, format_options: Dict[InputFormat, "FormatOption"]
234
235
  ) -> Iterable[InputDocument]:
235
236
  for item in self.path_or_stream_iterator:
236
- obj = resolve_source_to_stream(item) if isinstance(item, str) else item
237
+ obj = (
238
+ resolve_source_to_stream(item, self.headers)
239
+ if isinstance(item, str)
240
+ else item
241
+ )
237
242
  format = self._guess_format(obj)
238
243
  backend: Type[AbstractDocumentBackend]
239
244
  if format not in format_options.keys():
@@ -176,6 +176,7 @@ class DocumentConverter:
176
176
  def convert(
177
177
  self,
178
178
  source: Union[Path, str, DocumentStream], # TODO review naming
179
+ headers: Optional[Dict[str, str]] = None,
179
180
  raises_on_error: bool = True,
180
181
  max_num_pages: int = sys.maxsize,
181
182
  max_file_size: int = sys.maxsize,
@@ -185,6 +186,7 @@ class DocumentConverter:
185
186
  raises_on_error=raises_on_error,
186
187
  max_num_pages=max_num_pages,
187
188
  max_file_size=max_file_size,
189
+ headers=headers,
188
190
  )
189
191
  return next(all_res)
190
192
 
@@ -192,6 +194,7 @@ class DocumentConverter:
192
194
  def convert_all(
193
195
  self,
194
196
  source: Iterable[Union[Path, str, DocumentStream]], # TODO review naming
197
+ headers: Optional[Dict[str, str]] = None,
195
198
  raises_on_error: bool = True, # True: raises on first conversion error; False: does not raise on conv error
196
199
  max_num_pages: int = sys.maxsize,
197
200
  max_file_size: int = sys.maxsize,
@@ -201,8 +204,7 @@ class DocumentConverter:
201
204
  max_file_size=max_file_size,
202
205
  )
203
206
  conv_input = _DocumentConversionInput(
204
- path_or_stream_iterator=source,
205
- limits=limits,
207
+ path_or_stream_iterator=source, limits=limits, headers=headers
206
208
  )
207
209
  conv_res_iter = self._convert(conv_input, raises_on_error=raises_on_error)
208
210
 
@@ -138,18 +138,31 @@ class BaseOcrModel(BasePageModel):
138
138
 
139
139
  def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
140
140
  image = copy.deepcopy(page.image)
141
+ scale_x = image.width / page.size.width
142
+ scale_y = image.height / page.size.height
143
+
141
144
  draw = ImageDraw.Draw(image, "RGBA")
142
145
 
143
146
  # Draw OCR rectangles as yellow filled rect
144
147
  for rect in ocr_rects:
145
148
  x0, y0, x1, y1 = rect.as_tuple()
149
+ y0 *= scale_x
150
+ y1 *= scale_y
151
+ x0 *= scale_x
152
+ x1 *= scale_x
153
+
146
154
  shade_color = (255, 255, 0, 40) # transparent yellow
147
155
  draw.rectangle([(x0, y0), (x1, y1)], fill=shade_color, outline=None)
148
156
 
149
157
  # Draw OCR and programmatic cells
150
158
  for tc in page.cells:
151
159
  x0, y0, x1, y1 = tc.bbox.as_tuple()
152
- color = "red"
160
+ y0 *= scale_x
161
+ y1 *= scale_y
162
+ x0 *= scale_x
163
+ x1 *= scale_x
164
+
165
+ color = "gray"
153
166
  if isinstance(tc, OcrCell):
154
167
  color = "magenta"
155
168
  draw.rectangle([(x0, y0), (x1, y1)], outline=color)
@@ -67,29 +67,9 @@ class LayoutModel(BasePageModel):
67
67
  - Right: Clusters including FORM, KEY_VALUE_REGION, and PICTURE.
68
68
  Includes label names and confidence scores for each cluster.
69
69
  """
70
- label_to_color = {
71
- DocItemLabel.TEXT: (255, 255, 153), # Light Yellow
72
- DocItemLabel.CAPTION: (255, 204, 153), # Light Orange
73
- DocItemLabel.LIST_ITEM: (153, 153, 255), # Light Purple
74
- DocItemLabel.FORMULA: (192, 192, 192), # Gray
75
- DocItemLabel.TABLE: (255, 204, 204), # Light Pink
76
- DocItemLabel.PICTURE: (255, 204, 164), # Light Beige
77
- DocItemLabel.SECTION_HEADER: (255, 153, 153), # Light Red
78
- DocItemLabel.PAGE_HEADER: (204, 255, 204), # Light Green
79
- DocItemLabel.PAGE_FOOTER: (
80
- 204,
81
- 255,
82
- 204,
83
- ), # Light Green (same as Page-Header)
84
- DocItemLabel.TITLE: (255, 153, 153), # Light Red (same as Section-Header)
85
- DocItemLabel.FOOTNOTE: (200, 200, 255), # Light Blue
86
- DocItemLabel.DOCUMENT_INDEX: (220, 220, 220), # Light Gray
87
- DocItemLabel.CODE: (125, 125, 125), # Gray
88
- DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193), # Pale Green
89
- DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193), # Light Pink
90
- DocItemLabel.FORM: (200, 255, 255), # Light Cyan
91
- DocItemLabel.KEY_VALUE_REGION: (183, 65, 14), # Rusty orange
92
- }
70
+ scale_x = page.image.width / page.size.width
71
+ scale_y = page.image.height / page.size.height
72
+
93
73
  # Filter clusters for left and right images
94
74
  exclude_labels = {
95
75
  DocItemLabel.FORM,
@@ -118,6 +98,11 @@ class LayoutModel(BasePageModel):
118
98
  cell_color = (0, 0, 0, 40) # Transparent black for cells
119
99
  for tc in c.cells:
120
100
  cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
101
+ cx0 *= scale_x
102
+ cx1 *= scale_x
103
+ cy0 *= scale_x
104
+ cy1 *= scale_y
105
+
121
106
  draw.rectangle(
122
107
  [(cx0, cy0), (cx1, cy1)],
123
108
  outline=None,
@@ -125,8 +110,16 @@ class LayoutModel(BasePageModel):
125
110
  )
126
111
  # Draw cluster rectangle
127
112
  x0, y0, x1, y1 = c.bbox.as_tuple()
128
- cluster_fill_color = (*list(label_to_color.get(c.label)), 70)
129
- cluster_outline_color = (*list(label_to_color.get(c.label)), 255)
113
+ x0 *= scale_x
114
+ x1 *= scale_x
115
+ y0 *= scale_x
116
+ y1 *= scale_y
117
+
118
+ cluster_fill_color = (*list(DocItemLabel.get_color(c.label)), 70)
119
+ cluster_outline_color = (
120
+ *list(DocItemLabel.get_color(c.label)),
121
+ 255,
122
+ )
130
123
  draw.rectangle(
131
124
  [(x0, y0), (x1, y1)],
132
125
  outline=cluster_outline_color,
@@ -66,23 +66,43 @@ class TableStructureModel(BasePageModel):
66
66
  show: bool = False,
67
67
  ):
68
68
  assert page._backend is not None
69
+ assert page.size is not None
69
70
 
70
71
  image = (
71
72
  page._backend.get_page_image()
72
73
  ) # make new image to avoid drawing on the saved ones
74
+
75
+ scale_x = image.width / page.size.width
76
+ scale_y = image.height / page.size.height
77
+
73
78
  draw = ImageDraw.Draw(image)
74
79
 
75
80
  for table_element in tbl_list:
76
81
  x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
82
+ y0 *= scale_x
83
+ y1 *= scale_y
84
+ x0 *= scale_x
85
+ x1 *= scale_x
86
+
77
87
  draw.rectangle([(x0, y0), (x1, y1)], outline="red")
78
88
 
79
89
  for cell in table_element.cluster.cells:
80
90
  x0, y0, x1, y1 = cell.bbox.as_tuple()
91
+ x0 *= scale_x
92
+ x1 *= scale_x
93
+ y0 *= scale_x
94
+ y1 *= scale_y
95
+
81
96
  draw.rectangle([(x0, y0), (x1, y1)], outline="green")
82
97
 
83
98
  for tc in table_element.table_cells:
84
99
  if tc.bbox is not None:
85
100
  x0, y0, x1, y1 = tc.bbox.as_tuple()
101
+ x0 *= scale_x
102
+ x1 *= scale_x
103
+ y0 *= scale_x
104
+ y1 *= scale_y
105
+
86
106
  if tc.column_header:
87
107
  width = 3
88
108
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.14.0
3
+ Version: 2.15.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/DS4SD/docling
6
6
  License: MIT
@@ -26,7 +26,7 @@ Provides-Extra: tesserocr
26
26
  Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
27
27
  Requires-Dist: certifi (>=2024.7.4)
28
28
  Requires-Dist: deepsearch-glm (>=1.0.0,<2.0.0)
29
- Requires-Dist: docling-core[chunking] (>=2.12.1,<3.0.0)
29
+ Requires-Dist: docling-core[chunking] (>=2.13.1,<3.0.0)
30
30
  Requires-Dist: docling-ibm-models (>=3.1.0,<4.0.0)
31
31
  Requires-Dist: docling-parse (>=3.0.0,<4.0.0)
32
32
  Requires-Dist: easyocr (>=1.7,<2.0)
@@ -4,10 +4,10 @@ docling/backend/abstract_backend.py,sha256=-or6kWVV7egQeyIuN-vI0Tr7Q1htalBZSlhgq
4
4
  docling/backend/asciidoc_backend.py,sha256=kXZxOLk_LvLFVZwnJVVwjmvc3QWZ0iiG7VnwjgtC3hI,14051
5
5
  docling/backend/docling_parse_backend.py,sha256=_jY5f5-KGI3hi5pcZAY6e7tPLocSi5JUWrxraDVszqI,7631
6
6
  docling/backend/docling_parse_v2_backend.py,sha256=1TDUdMIp3fEjCWBNjusUHiCUmH1g6yZQ-b13scofP0Y,8637
7
- docling/backend/html_backend.py,sha256=qbu1W8xoTGnXMuZPRPLq68hDbCEj6ygnpxP5gYaodAQ,15593
7
+ docling/backend/html_backend.py,sha256=O8qXaw7MzOIdaxbBcjHieM9Ce4GEdtBj9YW0vpJspuA,15560
8
8
  docling/backend/md_backend.py,sha256=tmuSCghjor9PqKIiVieCuZ4_t5JEjZMy3cq7u3yTgyU,14032
9
9
  docling/backend/msexcel_backend.py,sha256=23qUEScqr5GhY06xiqg-eBQ_JlAqO0FkPEmX6554sVA,12040
10
- docling/backend/mspowerpoint_backend.py,sha256=QD0NaatTO8U9CIFoiipkq3X5HxLZaaahH8nlrQ6ecDA,15710
10
+ docling/backend/mspowerpoint_backend.py,sha256=kOGawhcn0BFq4M_C6kW0mY8vMIB24_6R6q6GaszbSt0,15957
11
11
  docling/backend/msword_backend.py,sha256=K1D_h0ulLA6KQsPe62327cDVkQqV1f7EetCHo66wCKw,19233
12
12
  docling/backend/pdf_backend.py,sha256=unnw7QiRE1VXg6Pj-eYrtnFGrp5SSYiI324OlFxyv6c,2050
13
13
  docling/backend/pypdfium2_backend.py,sha256=B4bfv-dfzlWiKTfF8LN5fto_99YBu8A2c1_XIVwRUWI,8996
@@ -16,25 +16,25 @@ docling/backend/xml/pubmed_backend.py,sha256=LMnpowjnxa5SydfNC00Ll840BYraL8dCJu-
16
16
  docling/backend/xml/uspto_backend.py,sha256=2YsnB-WRARIAaHPL6gxHePP24GQGi-Up2_K8ZapD3k4,70974
17
17
  docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
18
18
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- docling/cli/main.py,sha256=SdavhL0VTApK9JrKz0Pc1IYdnQhK-0OOaGT8zlTiN5c,15022
19
+ docling/cli/main.py,sha256=NR7NEt8Sf3FE9D7sHpEmABM9mFMTMO5w0VPwYIIvVsk,15481
20
20
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  docling/datamodel/base_models.py,sha256=50Jf5zk9c4-zmnOzZLoPBnHQhTX0_OFQzIkKgnKK1o4,6229
22
- docling/datamodel/document.py,sha256=rnNw2tGuCZ1BDoBptlNpPllQ2osJMQHMvcbyrQZuSL4,12948
22
+ docling/datamodel/document.py,sha256=OHM6bm0a-62xnAZ8DFlMHzATmbgNcfMxQoQO2udaW5Q,13071
23
23
  docling/datamodel/pipeline_options.py,sha256=u37Q12FVfu1UTEhgBiZ2KslyBtG3z3Eobqvaqd_MYaA,7735
24
24
  docling/datamodel/settings.py,sha256=Sw0rN_f8rdLV1eNvVeKiyET2Oe6oz9jtW3lJzniW9Do,1302
25
- docling/document_converter.py,sha256=PoRcL2IzGoT7ZppGk6laPmKiHOwrXl1-dLMNWumNogg,12298
25
+ docling/document_converter.py,sha256=_pk0sHuPXJ14NEutatf5bK2VyNiU5cvYsVbh1HIgrIw,12431
26
26
  docling/exceptions.py,sha256=-FoP46rFJgz_jn5uDv2V052udEEg8gckk6uhoItchXc,85
27
27
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  docling/models/base_model.py,sha256=Yq_-FmUhqhE20vXYG3WiQXDRTIPjik1CyuEZ8iYTGAY,701
29
- docling/models/base_ocr_model.py,sha256=rGSpBF4dByITcsBaRIgvFKpiu0CrhmZS_PHIo686Dw0,6428
29
+ docling/models/base_ocr_model.py,sha256=qILpSHaqczAd1eUQzuoLxN-TYz3zozmN0K5_7kCWkrM,6738
30
30
  docling/models/ds_glm_model.py,sha256=CkhsP0cEWwm4wb1g3cLFriVGpVtELiUK3REDMkPwAMw,13028
31
31
  docling/models/easyocr_model.py,sha256=Kakb20ioBxDmNsIqoGvSSs_vbqAWN3QQNHYtEi-eErg,4990
32
- docling/models/layout_model.py,sha256=skfFdWh_NgijR4bIqyUH8zlda5mMOIIdN3yMttdmsN8,9871
32
+ docling/models/layout_model.py,sha256=Xo8sclRTOO_V8Cr4RwuxB67vSWKF0LZ5nJRYU1WI--k,9063
33
33
  docling/models/ocr_mac_model.py,sha256=bLP14UUmZcSzjDe-HLj-mtksTuBmsCTg2C1wCxUpan0,4502
34
34
  docling/models/page_assemble_model.py,sha256=qdEX0AIb76ZOqJV6O9j-7r67WmuIkUlwbb2PsL7eFK4,7608
35
35
  docling/models/page_preprocessing_model.py,sha256=1gVrZjObKxAvXkkKvXnIFApPOggzgiTFPtt1CGbMbSs,2763
36
36
  docling/models/rapid_ocr_model.py,sha256=LOIvczJs3_db2o8mtrKk-pIXgC-xqWqRLu2cjA3wvy4,4980
37
- docling/models/table_structure_model.py,sha256=3bUBeP26WwDNCb5_aAlRwVZe4xUYgnwsSHgWQYZxk9E,8892
37
+ docling/models/table_structure_model.py,sha256=fUpCHthO4Uk3BhA99a85BHBm51fmdE9kfqhAk3WjuBw,9392
38
38
  docling/models/tesseract_ocr_cli_model.py,sha256=aKQBaty4cYu6zG_C5uy6Zm3eeRQo5fxIierbKixa2kc,6622
39
39
  docling/models/tesseract_ocr_model.py,sha256=RDf6iV1q-oXaGfZXv0bW6SqjHNKQvBUDlUsOkuz0neY,6095
40
40
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -49,8 +49,8 @@ docling/utils/glm_utils.py,sha256=IB19wToGath97gD3jAA3G_rQSptnZKhQCWLvPUCnkww,11
49
49
  docling/utils/layout_postprocessor.py,sha256=urRzeF9PrKiMBvA6DdHHwyLxG06CMhelgJeV5B1l6l0,24258
50
50
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
51
51
  docling/utils/utils.py,sha256=llhXSbIDNZ1MHOwBEfLHBAoJIAYI7QlPIonlI1jLUJ0,1208
52
- docling-2.14.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
53
- docling-2.14.0.dist-info/METADATA,sha256=FmM_aRgxeqVSKDOYc-8MEKH1ec_Z7x8cgMQoMVeaKDw,7732
54
- docling-2.14.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
- docling-2.14.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
56
- docling-2.14.0.dist-info/RECORD,,
52
+ docling-2.15.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
53
+ docling-2.15.0.dist-info/METADATA,sha256=VglEfKqffhUESHax5WQgtOT_Fysyea5HLDFtf7yUpdM,7732
54
+ docling-2.15.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
55
+ docling-2.15.0.dist-info/entry_points.txt,sha256=VOSzV77znM52dz5ysaDuJ0ijl1cnfrh1ZPg8od5OcTs,48
56
+ docling-2.15.0.dist-info/RECORD,,