deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
  120. deepdoctection/analyzer/_config.py +0 -146
  121. deepdoctection-0.42.0.dist-info/METADATA +0 -431
  122. deepdoctection-0.42.0.dist-info/RECORD +0 -148
  123. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -6,10 +6,9 @@
6
6
 
7
7
 
8
8
  """
9
- Some DataFlow classes for transforming and processing datapoints. Many classes have been taken from
10
-
11
- <https://github.com/tensorpack/dataflow/blob/master/dataflow/dataflow/common.py>
9
+ Some DataFlows for transforming and processing datapoints
12
10
  """
11
+
13
12
  import itertools
14
13
  from copy import copy
15
14
  from typing import Any, Callable, Iterator, Union
@@ -25,9 +24,10 @@ class TestDataSpeed(ProxyDataFlow):
25
24
 
26
25
  def __init__(self, df: DataFlow, size: int = 5000, warmup: int = 0) -> None:
27
26
  """
28
- :param df: the DataFlow to test.
29
- :param size: number of datapoints to fetch.
30
- :param warmup: warmup iterations
27
+ Args:
28
+ df: The DataFlow to test.
29
+ size: Number of datapoints to fetch.
30
+ warmup: Warmup iterations.
31
31
  """
32
32
  super().__init__(df)
33
33
  self.test_size = int(size)
@@ -63,16 +63,16 @@ class TestDataSpeed(ProxyDataFlow):
63
63
 
64
64
  class FlattenData(ProxyDataFlow):
65
65
  """
66
- Flatten an iterator within a datapoint. Will flatten the datapoint if it is a list or a tuple.
67
-
68
- **Example:**
69
-
70
- dp_1 = ['a','b']
71
- dp_2 = ['c','d']
66
+ FlattenData flattens an iterator within a datapoint. Will flatten the datapoint if it is a list or a tuple.
72
67
 
73
- will yield
68
+ Example:
69
+ ```python
70
+ dp_1 = ['a','b']
71
+ dp_2 = ['c','d']
74
72
 
75
- ['a'], ['b'], ['c'], ['d'].
73
+ yields:
74
+ ['a'], ['b'], ['c'], ['d']
75
+ ```
76
76
  """
77
77
 
78
78
  def __iter__(self) -> Any:
@@ -84,23 +84,25 @@ class FlattenData(ProxyDataFlow):
84
84
 
85
85
  class MapData(ProxyDataFlow):
86
86
  """
87
- Apply a mapper/filter on the datapoints of a DataFlow.
88
- Note:
89
- 1. Please make sure func doesn't modify its arguments in place,
90
- unless you're certain it's safe.
91
- 2. If you discard some datapoints, `len(MapData(ds))` will be incorrect.
87
+ MapData applies a mapper/filter on the datapoints of a DataFlow.
92
88
 
93
- **Example:**
89
+ Notes:
90
+ 1. Please ensure that `func` does not modify its arguments in-place unless it is safe.
91
+ 2. If some datapoints are discarded, `len(MapData(ds))` will be incorrect.
94
92
 
95
- df = ... # some dataflow each datapoint is [img, label]
96
- ds = MapData(ds, lambda dp: [dp[0] * 255, dp[1]])
93
+ Example:
94
+ ```python
95
+ df = ... # a DataFlow where each datapoint is [img, label]
96
+ ds = MapData(ds, lambda dp: [dp[0] * 255, dp[1]])
97
+ ```
97
98
  """
98
99
 
99
100
  def __init__(self, df: DataFlow, func: Callable[[Any], Any]) -> None:
100
101
  """
101
- :param df: input DataFlow
102
- :param func: takes a datapoint and returns a new
103
- datapoint. Return None to discard/skip this datapoint.
102
+ Args:
103
+ df: input DataFlow
104
+ func: takes a datapoint and returns a new
105
+ datapoint. Return None to discard/skip this datapoint.
104
106
  """
105
107
  super().__init__(df)
106
108
  self.func = func
@@ -114,27 +116,27 @@ class MapData(ProxyDataFlow):
114
116
 
115
117
  class MapDataComponent(MapData):
116
118
  """
117
- Apply a mapper/filter on a datapoint component.
118
-
119
- Note:
120
- 1. This dataflow itself doesn't modify the datapoints.
121
- But please make sure func doesn't modify its arguments in place,
122
- unless you're certain it's safe.
123
- 2. If you discard some datapoints, ``len(MapDataComponent(ds, ..))`` will be incorrect.
124
-
125
-
126
- **Example:**
127
-
128
- df = ... # some dataflow each datapoint is [img, label]
129
- ds = MapDataComponent(ds, lambda img: img * 255, 0) # map the 0th component
119
+ MapDataComponent applies a mapper/filter on a component of a datapoint.
120
+
121
+ Notes:
122
+ 1. This DataFlow itself does not modify the datapoints. Please ensure that `func` does not modify its arguments
123
+ in-place unless it is safe.
124
+ 2. If some datapoints are discarded, `len(MapDataComponent(ds, ..))` will be incorrect.
125
+
126
+ Example:
127
+ ```python
128
+ df = ... # a DataFlow where each datapoint is [img, label]
129
+ ds = MapDataComponent(ds, lambda img: img * 255, 0) # maps the 0th component
130
+ ```
130
131
  """
131
132
 
132
133
  def __init__(self, df: DataFlow, func: Callable[[Any], Any], index: Union[int, str] = 0) -> None:
133
134
  """
134
- :param df: input DataFlow which produces either list or dict.
135
- func (TYPE -> TYPE|None): takes ``dp[index]``, returns a new value for ``dp[index]``.
135
+ Args:
136
+ df: input DataFlow which produces either list or dict.
137
+ func (TYPE -> TYPE|None): takes ``dp[index]``, returns a new value for ``dp[index]``.
136
138
  Return None to discard/skip this datapoint.
137
- :param index: index or key of the component.
139
+ index: index or key of the component.
138
140
  """
139
141
  self._index = index
140
142
  self._func = func
@@ -152,16 +154,21 @@ class MapDataComponent(MapData):
152
154
 
153
155
 
154
156
  class RepeatedData(ProxyDataFlow):
155
- """Take data points from another DataFlow and produce them until
156
- it's exhausted for certain amount of times. i.e.:
157
- `dp1`, `dp2`, .... `dpn`, `dp1`, `dp2`, ....`dpn`.
157
+ """
158
+ RepeatedData takes datapoints from another DataFlow and produces them until they are exhausted for a certain number
159
+ of repetitions.
160
+
161
+ Example:
162
+ ```python
163
+ dp1, dp2, .... dpn, dp1, dp2, ....dpn
164
+ ```
158
165
  """
159
166
 
160
167
  def __init__(self, df: DataFlow, num: int) -> None:
161
168
  """
162
- :param df: input DataFlow
163
- :param num: number of times to repeat ds.
164
- Set to -1 to repeat ``ds`` infinite times.
169
+ Args:
170
+ df: Input DataFlow.
171
+ num: Number of repetitions of the DataFlow. Set `-1` to repeat the DataFlow infinitely.
165
172
  """
166
173
  self.num = num
167
174
  if self.num != -1:
@@ -173,7 +180,7 @@ class RepeatedData(ProxyDataFlow):
173
180
  def __len__(self) -> int:
174
181
  """
175
182
  Raises:
176
- `ValueError` when num == -1.
183
+ ValueError: when num == -1.
177
184
  """
178
185
  if self.num == -1:
179
186
  raise NotImplementedError("__len__() is unavailable for infinite dataflow")
@@ -190,20 +197,23 @@ class RepeatedData(ProxyDataFlow):
190
197
 
191
198
  class ConcatData(DataFlow):
192
199
  """
193
- Concatenate several DataFlow.
194
- Produce datapoints from each DataFlow and start the next when one
195
- DataFlow is exhausted. Use this dataflow to process several .pdf in one step.
200
+ ConcatData concatenates multiple DataFlows. Produces datapoints from each DataFlow and starts the next when one
201
+ DataFlow is exhausted. Use this DataFlow to process multiple .pdf files in one step.
202
+
203
+ Example:
204
+ ```python
205
+ df_1 = analyzer.analyze(path="path/to/pdf_1.pdf")
206
+ df_2 = analyzer.analyze(path="path/to/pdf_2.pdf")
207
+ df = ConcatData([df_1, df_2])
208
+ ```
196
209
 
197
- **Example:**
198
210
 
199
- df_1 = analyzer.analyze(path=path/to/pdf_1.pdf")
200
- df_2 = analyzer.analyze(path=path/to/pdf_2.pdf")
201
- df = ConcatData([df_1,df_2])
202
211
  """
203
212
 
204
213
  def __init__(self, df_lists: list[DataFlow]) -> None:
205
214
  """
206
- :param df_lists: a list of DataFlow.
215
+ Args:
216
+ df_lists: A list of DataFlows.
207
217
  """
208
218
  self.df_lists = df_lists
209
219
 
@@ -221,28 +231,31 @@ class ConcatData(DataFlow):
221
231
 
222
232
  class JoinData(DataFlow):
223
233
  """
224
- Join the components from each DataFlow. See below for its behavior.
225
- Note that you can't join a DataFlow that produces lists with one that produces dicts.
226
-
227
- **Example:**
234
+ JoinData joins the components from each DataFlow. See below for its behavior. It is not possible to join a DataFlow
235
+ that produces lists with one that produces dictionaries.
228
236
 
237
+ Example:
238
+ ```python
229
239
  df1 produces: [[c1], [c2]]
230
240
  df2 produces: [[c3], [c4]]
231
241
  joined: [[c1, c3], [c2, c4]]
232
242
 
233
- df1 produces: {"a":c1, "b":c2}
234
- df2 produces: {"c":c3}
235
- joined: {"a":c1, "b":c2, "c":c3}
243
+ df1 produces: {"a": c1, "b": c2}
244
+ df2 produces: {"c": c3}
245
+ joined: {"a": c1, "b": c2, "c": c3}
246
+ ```
247
+
248
+ `JoinData` stops once the first DataFlow raises a `StopIteration`.
249
+
236
250
 
237
- `JoinData` will stop once the first Dataflow throws a StopIteration
238
251
  """
239
252
 
240
253
  def __init__(self, df_lists: list[DataFlow]) -> None:
241
254
  """
242
- :param df_lists: a list of DataFlow. When these dataflows have different sizes, JoinData will stop when any
243
- of them is exhausted.
244
- The list could contain the same DataFlow instance more than once,
245
- but note that in that case `__iter__` will then also be called many times.
255
+ Args:
256
+ df_lists: A list of DataFlows. If these DataFlows have different sizes, `JoinData` stops when one of them is
257
+ exhausted. The list can contain the same DataFlow instance multiple times, but note that in this
258
+ case `__iter__` will also be called multiple times.
246
259
  """
247
260
  self.df_lists = df_lists
248
261
 
@@ -275,18 +288,26 @@ class JoinData(DataFlow):
275
288
 
276
289
  class BatchData(ProxyDataFlow):
277
290
  """
278
- Stack datapoints into batches. It produces datapoints of the same number of components as `df`, but
291
+ BatchData stacks datapoints into batches. It produces datapoints with the same number of components as `df`, but
279
292
  each datapoint is now a list of datapoints.
293
+
294
+ Example:
295
+ ```python
296
+ df produces: [[c1], [c2], [c3], [c4]]
297
+ batch_size = 2
298
+ yields: [[c1, c2], [c3, c4]]
299
+ ```
300
+
280
301
  """
281
302
 
282
303
  def __init__(self, df: DataFlow, batch_size: int, remainder: bool = False) -> None:
283
304
  """
284
- :param df: A dataflow
285
- :param batch_size: batch size
286
- :param remainder: When the remaining datapoints in ``df`` is not enough to form a batch, whether or not to
287
- also produce the remaining data as a smaller batch.
288
- If set to `False`, all produced datapoints are guaranteed to have the same batch size.
289
- If set to `True`, `len(ds)` must be accurate.
305
+ Args:
306
+ df: A DataFlow.
307
+ batch_size: Batch size.
308
+ remainder: If the remaining datapoints in `df` are not enough to form a batch, whether to produce the
309
+ remaining data as a smaller batch. If set to `False`, all produced datapoints are guaranteed to
310
+ have the same batch size. If set to `True`, `len(ds)` must be accurate.
290
311
  """
291
312
  super().__init__(df)
292
313
  if not remainder:
@@ -16,8 +16,7 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Adding some functionality to dataflow classes (e.g. monkey patching, inheritance ...). Some ideas have been taken
20
- from
19
+ Some custom dataflow classes. Some ideas have been taken from
21
20
 
22
21
  <https://github.com/tensorpack/dataflow/blob/master/dataflow/dataflow/common.py>
23
22
  """
@@ -40,18 +39,22 @@ class CacheData(ProxyDataFlow):
40
39
  Completely cache the first pass of a DataFlow in memory,
41
40
  and produce from the cache thereafter.
42
41
 
43
- NOTE: The user should not stop the iterator before it has reached the end.
44
- Otherwise, the cache may be incomplete.
42
+ Note:
43
+ The user should not stop the iterator before it has reached the end.
44
+ Otherwise, the cache may be incomplete.
45
45
 
46
- **Example:**
46
+ Example:
47
+ ```python
48
+ df_list = CacheData(df).get_cache() # Buffers the whole dataflow and return a list of all datapoints
49
+ ```
47
50
 
48
- df_list = CacheData(df).get_cache() # buffers the whole dataflow and return a list of all datapoints
49
51
  """
50
52
 
51
53
  def __init__(self, df: DataFlow, shuffle: bool = False) -> None:
52
54
  """
53
- :param df: input DataFlow.
54
- :param shuffle: whether to shuffle the cache before yielding from it.
55
+ Args:
56
+ df: input DataFlow.
57
+ shuffle: whether to shuffle the cache before yielding from it.
55
58
  """
56
59
  self.shuffle = shuffle
57
60
  self.buffer: list[Any] = []
@@ -80,9 +83,10 @@ class CacheData(ProxyDataFlow):
80
83
 
81
84
  def get_cache(self) -> list[Any]:
82
85
  """
83
- get the cache of the whole dataflow as a list
86
+ Get the cache of the whole dataflow as a list.
84
87
 
85
- :return: list of datapoints
88
+ Returns:
89
+ list of datapoints
86
90
  """
87
91
  self.reset_state()
88
92
  with get_tqdm() as status_bar:
@@ -95,21 +99,22 @@ class CacheData(ProxyDataFlow):
95
99
 
96
100
  class CustomDataFromList(DataFromList):
97
101
  """
98
- Wraps a list of datapoints to a dataflow. Compared to `Tensorpack.DataFlow.DataFromList` implementation you
99
- can specify a number of datapoints after that the iteration stops. You can also pass a rebalance function that
100
- filters on that list.
102
+ Wraps a list of datapoints to a dataflow. Compared to `Tensorpack.DataFlow.DataFromList`
103
+ implementation you can specify a number of datapoints after that the iteration stops.
104
+ You can also pass a re-balance function that filters on that list.
101
105
 
102
- **Example:**
106
+ Example:
103
107
 
104
- def filter_first(lst):
105
- return lst.pop(0)
108
+ ```python
109
+ def filter_first(lst):
110
+ return lst.pop(0)
106
111
 
107
- df = CustomDataFromList(lst=[["a","b"],["c","d"]],rebalance_func=filter_first)
108
- df.reset_state()
112
+ df = CustomDataFromList(lst=[["a","b"],["c","d"]], rebalance_func=filter_first)
113
+ df.reset_state()
109
114
 
110
115
  will yield:
111
-
112
116
  ["c","d"]
117
+ ```
113
118
 
114
119
  """
115
120
 
@@ -121,13 +126,14 @@ class CustomDataFromList(DataFromList):
121
126
  rebalance_func: Optional[Callable[[list[Any]], list[Any]]] = None,
122
127
  ):
123
128
  """
124
- :param lst: the input list. Each element represents a datapoint.
125
- :param shuffle: Whether to shuffle the list before streaming.
126
- :param max_datapoints: The maximum number of datapoints to return before stopping the iteration.
127
- If None it streams the whole dataflow.
128
- :param rebalance_func: A func that inputs a list and outputs a list. Useful, if you want to filter the passed
129
- list and re-balance the sample. Only the output list of the re-balancing function will be
130
- considered.
129
+ Args:
130
+ lst: The input list. Each element represents a datapoint.
131
+ shuffle: Whether to shuffle the list before streaming.
132
+ max_datapoints: The maximum number of datapoints to return before stopping the iteration.
133
+ If None it streams the whole dataflow.
134
+ rebalance_func: A func that inputs a list and outputs a list. Useful, if you want to filter the passed
135
+ list and re-balance the sample. Only the output list of the re-balancing function will be
136
+ considered.
131
137
  """
132
138
  super().__init__(lst, shuffle)
133
139
  self.max_datapoints = max_datapoints
@@ -176,9 +182,10 @@ class CustomDataFromIterable(DataFromIterable):
176
182
 
177
183
  def __init__(self, iterable: Iterable[Any], max_datapoints: Optional[int] = None):
178
184
  """
179
- :param iterable: An iterable object
180
- :param max_datapoints: The maximum number of datapoints to stream. If None it iterates through the whole
181
- dataflow.
185
+ Args:
186
+ iterable: An iterable object
187
+ max_datapoints: The maximum number of datapoints to stream. If None it iterates through the whole
188
+ dataflow.
182
189
  """
183
190
  super().__init__(iterable)
184
191
  self.max_datapoints = max_datapoints