macrodata-refiner 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/PKG-INFO +57 -29
  2. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/README.md +22 -19
  3. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/pyproject.toml +40 -8
  4. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/PKG-INFO +57 -29
  5. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/SOURCES.txt +79 -16
  6. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/entry_points.txt +1 -0
  7. macrodata_refiner-0.3.0/src/macrodata_refiner.egg-info/requires.txt +66 -0
  8. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/__init__.py +36 -0
  9. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/cli/auth.py +13 -13
  10. macrodata_refiner-0.3.0/src/refiner/cli/commands/__init__.py +1 -0
  11. macrodata_refiner-0.2.2/src/refiner/cli/main.py → macrodata_refiner-0.3.0/src/refiner/cli/commands/auth.py +3 -21
  12. macrodata_refiner-0.3.0/src/refiner/cli/commands/jobs.py +194 -0
  13. macrodata_refiner-0.3.0/src/refiner/cli/commands/run.py +42 -0
  14. macrodata_refiner-0.3.0/src/refiner/cli/commands/secrets.py +53 -0
  15. macrodata_refiner-0.3.0/src/refiner/cli/common.py +66 -0
  16. macrodata_refiner-0.3.0/src/refiner/cli/jobs/__init__.py +1 -0
  17. macrodata_refiner-0.3.0/src/refiner/cli/jobs/attach.py +49 -0
  18. macrodata_refiner-0.3.0/src/refiner/cli/jobs/common.py +161 -0
  19. macrodata_refiner-0.3.0/src/refiner/cli/jobs/control.py +30 -0
  20. macrodata_refiner-0.3.0/src/refiner/cli/jobs/follow.py +299 -0
  21. macrodata_refiner-0.3.0/src/refiner/cli/jobs/get.py +237 -0
  22. macrodata_refiner-0.3.0/src/refiner/cli/jobs/list.py +86 -0
  23. macrodata_refiner-0.3.0/src/refiner/cli/jobs/logs.py +574 -0
  24. macrodata_refiner-0.3.0/src/refiner/cli/jobs/manifest.py +158 -0
  25. macrodata_refiner-0.3.0/src/refiner/cli/jobs/metrics.py +346 -0
  26. macrodata_refiner-0.3.0/src/refiner/cli/jobs/workers.py +87 -0
  27. macrodata_refiner-0.3.0/src/refiner/cli/main.py +34 -0
  28. macrodata_refiner-0.3.0/src/refiner/cli/run/__init__.py +1 -0
  29. macrodata_refiner-0.3.0/src/refiner/cli/run/cloud.py +575 -0
  30. macrodata_refiner-0.3.0/src/refiner/cli/run/command.py +92 -0
  31. macrodata_refiner-0.3.0/src/refiner/cli/run/local.py +343 -0
  32. macrodata_refiner-0.3.0/src/refiner/cli/run/modes.py +69 -0
  33. macrodata_refiner-0.3.0/src/refiner/cli/secrets.py +105 -0
  34. macrodata_refiner-0.3.0/src/refiner/cli/ui/__init__.py +15 -0
  35. macrodata_refiner-0.3.0/src/refiner/cli/ui/console.py +943 -0
  36. macrodata_refiner-0.2.2/src/refiner/cli/ui.py → macrodata_refiner-0.3.0/src/refiner/cli/ui/terminal.py +7 -0
  37. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/asyncio/runtime.py +1 -3
  38. macrodata_refiner-0.3.0/src/refiner/execution/asyncio/window.py +119 -0
  39. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/engine.py +158 -14
  40. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/operators/row.py +2 -2
  41. macrodata_refiner-0.3.0/src/refiner/execution/operators/vectorized.py +234 -0
  42. macrodata_refiner-0.3.0/src/refiner/inference/__init__.py +82 -0
  43. macrodata_refiner-0.3.0/src/refiner/inference/capabilities.py +246 -0
  44. macrodata_refiner-0.3.0/src/refiner/inference/generate_pooling.py +49 -0
  45. macrodata_refiner-0.3.0/src/refiner/inference/generate_text.py +241 -0
  46. macrodata_refiner-0.3.0/src/refiner/inference/internal/__init__.py +1 -0
  47. macrodata_refiner-0.3.0/src/refiner/inference/internal/media.py +133 -0
  48. macrodata_refiner-0.3.0/src/refiner/inference/internal/message_conversion.py +45 -0
  49. macrodata_refiner-0.3.0/src/refiner/inference/internal/response.py +70 -0
  50. macrodata_refiner-0.3.0/src/refiner/inference/internal/runtime.py +177 -0
  51. macrodata_refiner-0.3.0/src/refiner/inference/internal/schema.py +71 -0
  52. macrodata_refiner-0.3.0/src/refiner/inference/internal/transport.py +380 -0
  53. macrodata_refiner-0.3.0/src/refiner/inference/internal/usage.py +31 -0
  54. macrodata_refiner-0.3.0/src/refiner/inference/providers/__init__.py +15 -0
  55. macrodata_refiner-0.3.0/src/refiner/inference/providers/anthropic.py +694 -0
  56. macrodata_refiner-0.3.0/src/refiner/inference/providers/base.py +138 -0
  57. macrodata_refiner-0.3.0/src/refiner/inference/providers/google.py +787 -0
  58. macrodata_refiner-0.3.0/src/refiner/inference/providers/openai.py +1242 -0
  59. macrodata_refiner-0.3.0/src/refiner/inference/providers/warnings.py +55 -0
  60. macrodata_refiner-0.3.0/src/refiner/inference/types.py +342 -0
  61. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/io/datafile.py +67 -1
  62. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/io/datafolder.py +10 -6
  63. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/io/fileset.py +86 -17
  64. macrodata_refiner-0.3.0/src/refiner/job_urls.py +16 -0
  65. macrodata_refiner-0.3.0/src/refiner/launchers/base.py +104 -0
  66. macrodata_refiner-0.3.0/src/refiner/launchers/cloud.py +372 -0
  67. macrodata_refiner-0.3.0/src/refiner/launchers/local.py +516 -0
  68. macrodata_refiner-0.3.0/src/refiner/launchers/secrets.py +153 -0
  69. macrodata_refiner-0.3.0/src/refiner/pipeline/__init__.py +55 -0
  70. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/data/block.py +9 -3
  71. macrodata_refiner-0.3.0/src/refiner/pipeline/data/datatype.py +409 -0
  72. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/data/shard.py +10 -2
  73. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/data/tabular.py +209 -43
  74. macrodata_refiner-0.3.0/src/refiner/pipeline/pipeline.py +1274 -0
  75. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/planning.py +77 -45
  76. macrodata_refiner-0.3.0/src/refiner/pipeline/resources.py +48 -0
  77. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sinks/__init__.py +4 -1
  78. macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/assets.py +430 -0
  79. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sinks/base.py +37 -7
  80. macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/jsonl.py +147 -0
  81. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sinks/lerobot.py +165 -45
  82. macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/parquet.py +146 -0
  83. macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/reducer/__init__.py +9 -0
  84. macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/reducer/file.py +180 -0
  85. macrodata_refiner-0.2.2/src/refiner/pipeline/sinks/lerobot_reducer.py → macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/reducer/lerobot.py +29 -19
  86. macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/reducer/zarr.py +281 -0
  87. macrodata_refiner-0.3.0/src/refiner/pipeline/sinks/zarr.py +602 -0
  88. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/__init__.py +16 -2
  89. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/base.py +4 -0
  90. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/__init__.py +29 -0
  91. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/readers/base.py +29 -1
  92. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/readers/csv.py +23 -5
  93. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/files.py +166 -0
  94. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/hdf5.py +280 -0
  95. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/hf_dataset.py +416 -0
  96. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/json.py +167 -0
  97. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/mcap.py +967 -0
  98. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/readers/parquet.py +55 -11
  99. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/tfds.py +392 -0
  100. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/tfrecord.py +205 -0
  101. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/utils.py +237 -0
  102. macrodata_refiner-0.3.0/src/refiner/pipeline/sources/readers/zarr.py +577 -0
  103. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/steps.py +6 -1
  104. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/cache/decoder_cache.py +15 -11
  105. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/platform/auth.py +14 -4
  106. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/platform/client/__init__.py +20 -13
  107. macrodata_refiner-0.3.0/src/refiner/platform/client/api.py +577 -0
  108. macrodata_refiner-0.3.0/src/refiner/platform/client/models.py +319 -0
  109. macrodata_refiner-0.3.0/src/refiner/platform/client/serialize.py +39 -0
  110. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/platform/manifest.py +59 -2
  111. macrodata_refiner-0.3.0/src/refiner/robotics/__init__.py +47 -0
  112. macrodata_refiner-0.3.0/src/refiner/robotics/egocentric.py +99 -0
  113. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/__init__.py +0 -2
  114. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/row.py +135 -72
  115. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/tabular.py +37 -8
  116. macrodata_refiner-0.3.0/src/refiner/robotics/motion.py +181 -0
  117. macrodata_refiner-0.3.0/src/refiner/robotics/reward.py +268 -0
  118. macrodata_refiner-0.3.0/src/refiner/robotics/row.py +867 -0
  119. macrodata_refiner-0.3.0/src/refiner/robotics/subtask_annotation.py +466 -0
  120. macrodata_refiner-0.3.0/src/refiner/robotics/synchronization.py +244 -0
  121. macrodata_refiner-0.3.0/src/refiner/robotics/tabular.py +172 -0
  122. macrodata_refiner-0.3.0/src/refiner/services/__init__.py +14 -0
  123. macrodata_refiner-0.3.0/src/refiner/services/base.py +44 -0
  124. macrodata_refiner-0.3.0/src/refiner/services/discovery.py +102 -0
  125. macrodata_refiner-0.3.0/src/refiner/services/manager.py +251 -0
  126. macrodata_refiner-0.3.0/src/refiner/services/vllm.py +78 -0
  127. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/text/commoncrawl.py +9 -2
  128. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/video/__init__.py +27 -1
  129. macrodata_refiner-0.3.0/src/refiner/video/decode.py +279 -0
  130. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/video/remux.py +68 -15
  131. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/video/transcode.py +57 -44
  132. macrodata_refiner-0.3.0/src/refiner/video/types.py +520 -0
  133. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/video/writer.py +80 -31
  134. macrodata_refiner-0.3.0/src/refiner/worker/context.py +177 -0
  135. macrodata_refiner-0.3.0/src/refiner/worker/entrypoint.py +99 -0
  136. macrodata_refiner-0.3.0/src/refiner/worker/lifecycle.py +142 -0
  137. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/metrics/api.py +4 -2
  138. macrodata_refiner-0.3.0/src/refiner/worker/metrics/emitter.py +112 -0
  139. macrodata_refiner-0.3.0/src/refiner/worker/resources/cpu.py +24 -0
  140. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/resources/gpu.py +7 -7
  141. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/runner.py +115 -163
  142. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/workdir.py +2 -2
  143. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/tests/test_cache.py +2 -3
  144. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/tests/test_commoncrawl_text.py +56 -5
  145. macrodata_refiner-0.3.0/tests/test_video_decode.py +255 -0
  146. macrodata_refiner-0.2.2/src/macrodata_refiner.egg-info/requires.txt +0 -35
  147. macrodata_refiner-0.2.2/src/refiner/execution/asyncio/window.py +0 -91
  148. macrodata_refiner-0.2.2/src/refiner/execution/operators/vectorized.py +0 -143
  149. macrodata_refiner-0.2.2/src/refiner/launchers/base.py +0 -215
  150. macrodata_refiner-0.2.2/src/refiner/launchers/cloud.py +0 -210
  151. macrodata_refiner-0.2.2/src/refiner/launchers/local.py +0 -336
  152. macrodata_refiner-0.2.2/src/refiner/pipeline/__init__.py +0 -25
  153. macrodata_refiner-0.2.2/src/refiner/pipeline/pipeline.py +0 -603
  154. macrodata_refiner-0.2.2/src/refiner/pipeline/sinks/jsonl.py +0 -81
  155. macrodata_refiner-0.2.2/src/refiner/pipeline/sinks/parquet.py +0 -78
  156. macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/__init__.py +0 -15
  157. macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/jsonl.py +0 -97
  158. macrodata_refiner-0.2.2/src/refiner/pipeline/sources/readers/utils.py +0 -104
  159. macrodata_refiner-0.2.2/src/refiner/platform/client/api.py +0 -263
  160. macrodata_refiner-0.2.2/src/refiner/platform/client/http.py +0 -118
  161. macrodata_refiner-0.2.2/src/refiner/platform/client/models.py +0 -197
  162. macrodata_refiner-0.2.2/src/refiner/platform/client/serialize.py +0 -34
  163. macrodata_refiner-0.2.2/src/refiner/robotics/__init__.py +0 -25
  164. macrodata_refiner-0.2.2/src/refiner/robotics/motion.py +0 -165
  165. macrodata_refiner-0.2.2/src/refiner/video/types.py +0 -23
  166. macrodata_refiner-0.2.2/src/refiner/worker/context.py +0 -121
  167. macrodata_refiner-0.2.2/src/refiner/worker/entrypoint.py +0 -113
  168. macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/__init__.py +0 -5
  169. macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/base.py +0 -25
  170. macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/__init__.py +0 -3
  171. macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/claim.py +0 -147
  172. macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/files.py +0 -41
  173. macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/local/lifecycle.py +0 -308
  174. macrodata_refiner-0.2.2/src/refiner/worker/lifecycle/platform.py +0 -99
  175. macrodata_refiner-0.2.2/src/refiner/worker/metrics/context.py +0 -147
  176. macrodata_refiner-0.2.2/src/refiner/worker/metrics/otel.py +0 -364
  177. macrodata_refiner-0.2.2/src/refiner/worker/resources/cpu.py +0 -123
  178. macrodata_refiner-0.2.2/src/refiner/worker/resources/memory.py +0 -63
  179. macrodata_refiner-0.2.2/src/refiner/worker/resources/network.py +0 -27
  180. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/LICENSE +0 -0
  181. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/setup.cfg +0 -0
  182. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/dependency_links.txt +0 -0
  183. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/macrodata_refiner.egg-info/top_level.txt +0 -0
  184. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/cli/__init__.py +0 -0
  185. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/__init__.py +0 -0
  186. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/asyncio/__init__.py +0 -0
  187. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/buffer.py +0 -0
  188. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/operators/__init__.py +0 -0
  189. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/tracking/__init__.py +0 -0
  190. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/execution/tracking/shards.py +0 -0
  191. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/io/__init__.py +0 -0
  192. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/launchers/__init__.py +0 -0
  193. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/data/row.py +0 -0
  194. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/expressions.py +0 -0
  195. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/items.py +0 -0
  196. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/readers/lerobot.py +0 -0
  197. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/sources/task.py +0 -0
  198. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/__init__.py +0 -0
  199. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/cache/__init__.py +0 -0
  200. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/cache/file_cache.py +0 -0
  201. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/pipeline/utils/cache/lease_cache.py +0 -0
  202. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/platform/__init__.py +0 -0
  203. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/py.typed +0 -0
  204. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/__init__.py +0 -0
  205. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/info.py +0 -0
  206. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/metadata.py +0 -0
  207. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/stats.py +0 -0
  208. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/robotics/lerobot_format/metadata/tasks.py +0 -0
  209. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/text/__init__.py +0 -0
  210. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/utils/__init__.py +0 -0
  211. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/utils/imports.py +0 -0
  212. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/__init__.py +0 -0
  213. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/metrics/__init__.py +0 -0
  214. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/src/refiner/worker/resources/__init__.py +0 -0
  215. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/tests/test_expressions.py +0 -0
  216. {macrodata_refiner-0.2.2 → macrodata_refiner-0.3.0}/tests/test_optional_dependencies.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: macrodata-refiner
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
5
5
  Author: Macrodata Labs
6
6
  License-Expression: Apache-2.0
@@ -13,30 +13,55 @@ Requires-Python: >=3.10
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: cloudpickle==3.1.2
16
- Requires-Dist: fsspec
16
+ Requires-Dist: fsspec[http]
17
17
  Requires-Dist: httpx
18
18
  Requires-Dist: loguru
19
- Requires-Dist: opentelemetry-exporter-otlp-proto-http
20
- Requires-Dist: opentelemetry-sdk
21
19
  Requires-Dist: numpy
22
- Requires-Dist: psutil
23
20
  Requires-Dist: orjson
21
+ Requires-Dist: packaging
24
22
  Requires-Dist: pyarrow
25
23
  Requires-Dist: msgspec>=0.20.0
24
+ Requires-Dist: pydantic>=2.0.0
26
25
  Provides-Extra: video
27
26
  Requires-Dist: av; extra == "video"
28
- Provides-Extra: robotics
29
- Requires-Dist: macrodata-refiner[video]; extra == "robotics"
30
- Requires-Dist: huggingface-hub>=1.4.1; extra == "robotics"
31
- Requires-Dist: hf>=1.7.1; extra == "robotics"
27
+ Requires-Dist: pillow; extra == "video"
28
+ Provides-Extra: hf
29
+ Requires-Dist: datasets>=3.0.0; extra == "hf"
30
+ Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
31
+ Requires-Dist: hf>=1.7.1; extra == "hf"
32
+ Provides-Extra: egocentric
33
+ Requires-Dist: macrodata-refiner[hf]; extra == "egocentric"
34
+ Requires-Dist: macrodata-refiner[video]; extra == "egocentric"
35
+ Requires-Dist: ego-vision[models]>=0.1.8; extra == "egocentric"
32
36
  Provides-Extra: text
33
37
  Requires-Dist: warcio; extra == "text"
38
+ Provides-Extra: hdf5
39
+ Requires-Dist: h5py; extra == "hdf5"
40
+ Provides-Extra: zarr
41
+ Requires-Dist: zarr<3,>=2.18; extra == "zarr"
42
+ Requires-Dist: numcodecs<0.16; extra == "zarr"
43
+ Provides-Extra: mcap
44
+ Requires-Dist: av; extra == "mcap"
45
+ Requires-Dist: mcap; extra == "mcap"
46
+ Requires-Dist: mcap-protobuf-support; extra == "mcap"
47
+ Requires-Dist: mcap-ros2-support; extra == "mcap"
48
+ Requires-Dist: pillow; extra == "mcap"
34
49
  Provides-Extra: s3
35
50
  Requires-Dist: s3fs; extra == "s3"
51
+ Provides-Extra: tensorflow
52
+ Requires-Dist: tensorflow; extra == "tensorflow"
53
+ Provides-Extra: tfds
54
+ Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
55
+ Requires-Dist: tensorflow-datasets; extra == "tfds"
36
56
  Provides-Extra: testing
37
- Requires-Dist: macrodata-refiner[robotics]; extra == "testing"
57
+ Requires-Dist: macrodata-refiner[hdf5]; extra == "testing"
58
+ Requires-Dist: macrodata-refiner[hf]; extra == "testing"
59
+ Requires-Dist: macrodata-refiner[mcap]; extra == "testing"
60
+ Requires-Dist: macrodata-refiner[video]; extra == "testing"
61
+ Requires-Dist: macrodata-refiner[zarr]; extra == "testing"
38
62
  Requires-Dist: macrodata-refiner[text]; extra == "testing"
39
63
  Requires-Dist: macrodata-refiner[s3]; extra == "testing"
64
+ Requires-Dist: macrodata-refiner[tfds]; extra == "testing"
40
65
  Requires-Dist: pytest>=8.0.0; extra == "testing"
41
66
  Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
42
67
  Provides-Extra: all
@@ -49,9 +74,10 @@ Dynamic: license-file
49
74
 
50
75
  <h1 align="center">Macrodata Refiner</h1>
51
76
 
52
- Refiner is an open-source engine for turning raw, unstructured, and multimodal data into **high-quality datasets** for large model training.
77
+ Refiner is an open-source engine for turning raw robotics and multimodal data into **high-quality datasets** for model training.
53
78
 
54
- It replaces the brittle scripts and stitched-together data tooling that teams still use for training data work, while offering much better support for multimodal data, robotics workflows, and model-based processing.
79
+ It gives training-data teams one pipeline model for multimodal data, robotics
80
+ workflows, and model-based processing.
55
81
 
56
82
  It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
57
83
 
@@ -90,7 +116,7 @@ import refiner as mdr
90
116
  pad_frames=5,
91
117
  )
92
118
  )
93
- .write_lerobot("hf://buckets/macrodata/test_bucket/aloha_motion")
119
+ .write_lerobot("hf://buckets/acme-robotics/aloha_motion")
94
120
  .launch_cloud(
95
121
  name="motion_trim",
96
122
  num_workers=4,
@@ -98,7 +124,7 @@ import refiner as mdr
98
124
  )
99
125
  ```
100
126
 
101
- Need cloud GPUs? See [Launchers](docs/launchers.md) for the GPU-specific cloud options.
127
+ Need cloud GPUs? See [Resources, GPUs, and Services](docs/running-pipelines/resources-gpus-and-services.md).
102
128
 
103
129
  ### Local example
104
130
 
@@ -137,31 +163,33 @@ def add_preview(row):
137
163
 
138
164
  - training-data-first pipeline primitives instead of generic ETL abstractions
139
165
  - multimodal processing, with robotics support today
140
- - a lot of built-in readers, transforms, sinks, and lifecycle/runtime machinery so you do not have to rebuild the same scaffolding in scripts
166
+ - built-in readers, transforms, sinks, and runtime machinery for common dataset work
141
167
  - access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
142
168
  - local execution for development and elastic cloud execution for large runs
143
- - built-in observability through the Macrodata platform, so you can inspect how your data is changing instead of debugging blindly after the fact
169
+ - built-in observability through the Macrodata platform for job state, logs, metrics, and manifests
144
170
 
145
171
  ## Docs
146
172
 
147
- Getting started:
173
+ Start here:
148
174
 
149
- - [Pipeline basics](docs/pipeline-basics.md)
150
- - [Launchers](docs/launchers.md)
151
- - [CLI](docs/cli.md)
175
+ - [Docs index](docs/index.md)
176
+ - [Quickstart](docs/quickstart.md)
177
+ - [Running pipelines](docs/running-pipelines/index.md)
152
178
 
153
- Core concepts:
179
+ Build a dataset:
154
180
 
155
- - [Reading and writing data](docs/reading-and-writing.md)
156
- - [Transforms](docs/transforms.md)
157
- - [Expressions](docs/expressions.md)
158
- - [In-process debugging](docs/in-process-debugging.md)
159
- - [Task pipelines](docs/task-pipelines.md)
181
+ - [Reading data](docs/reading-data/index.md)
182
+ - [Episode data](docs/episode-data/index.md)
183
+ - [Transforms](docs/transforms/index.md)
184
+ - [Episode operations](docs/episode-operations/index.md)
185
+ - [Writing data](docs/writing-data/index.md)
186
+ - [Examples](docs/examples/index.md)
160
187
 
161
- Modalities and platform:
188
+ Operate jobs:
162
189
 
163
- - [Robotics](docs/robotics.md)
164
- - [Observability](docs/observability.md)
190
+ - [Platform](docs/platform/index.md)
191
+ - [CLI](docs/cli/index.md)
192
+ - [Reference](docs/reference/index.md)
165
193
 
166
194
  ## Community
167
195
 
@@ -4,9 +4,10 @@
4
4
 
5
5
  <h1 align="center">Macrodata Refiner</h1>
6
6
 
7
- Refiner is an open-source engine for turning raw, unstructured, and multimodal data into **high-quality datasets** for large model training.
7
+ Refiner is an open-source engine for turning raw robotics and multimodal data into **high-quality datasets** for model training.
8
8
 
9
- It replaces the brittle scripts and stitched-together data tooling that teams still use for training data work, while offering much better support for multimodal data, robotics workflows, and model-based processing.
9
+ It gives training-data teams one pipeline model for multimodal data, robotics
10
+ workflows, and model-based processing.
10
11
 
11
12
  It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
12
13
 
@@ -45,7 +46,7 @@ import refiner as mdr
45
46
  pad_frames=5,
46
47
  )
47
48
  )
48
- .write_lerobot("hf://buckets/macrodata/test_bucket/aloha_motion")
49
+ .write_lerobot("hf://buckets/acme-robotics/aloha_motion")
49
50
  .launch_cloud(
50
51
  name="motion_trim",
51
52
  num_workers=4,
@@ -53,7 +54,7 @@ import refiner as mdr
53
54
  )
54
55
  ```
55
56
 
56
- Need cloud GPUs? See [Launchers](docs/launchers.md) for the GPU-specific cloud options.
57
+ Need cloud GPUs? See [Resources, GPUs, and Services](docs/running-pipelines/resources-gpus-and-services.md).
57
58
 
58
59
  ### Local example
59
60
 
@@ -92,31 +93,33 @@ def add_preview(row):
92
93
 
93
94
  - training-data-first pipeline primitives instead of generic ETL abstractions
94
95
  - multimodal processing, with robotics support today
95
- - a lot of built-in readers, transforms, sinks, and lifecycle/runtime machinery so you do not have to rebuild the same scaffolding in scripts
96
+ - built-in readers, transforms, sinks, and runtime machinery for common dataset work
96
97
  - access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
97
98
  - local execution for development and elastic cloud execution for large runs
98
- - built-in observability through the Macrodata platform, so you can inspect how your data is changing instead of debugging blindly after the fact
99
+ - built-in observability through the Macrodata platform for job state, logs, metrics, and manifests
99
100
 
100
101
  ## Docs
101
102
 
102
- Getting started:
103
+ Start here:
103
104
 
104
- - [Pipeline basics](docs/pipeline-basics.md)
105
- - [Launchers](docs/launchers.md)
106
- - [CLI](docs/cli.md)
105
+ - [Docs index](docs/index.md)
106
+ - [Quickstart](docs/quickstart.md)
107
+ - [Running pipelines](docs/running-pipelines/index.md)
107
108
 
108
- Core concepts:
109
+ Build a dataset:
109
110
 
110
- - [Reading and writing data](docs/reading-and-writing.md)
111
- - [Transforms](docs/transforms.md)
112
- - [Expressions](docs/expressions.md)
113
- - [In-process debugging](docs/in-process-debugging.md)
114
- - [Task pipelines](docs/task-pipelines.md)
111
+ - [Reading data](docs/reading-data/index.md)
112
+ - [Episode data](docs/episode-data/index.md)
113
+ - [Transforms](docs/transforms/index.md)
114
+ - [Episode operations](docs/episode-operations/index.md)
115
+ - [Writing data](docs/writing-data/index.md)
116
+ - [Examples](docs/examples/index.md)
115
117
 
116
- Modalities and platform:
118
+ Operate jobs:
117
119
 
118
- - [Robotics](docs/robotics.md)
119
- - [Observability](docs/observability.md)
120
+ - [Platform](docs/platform/index.md)
121
+ - [CLI](docs/cli/index.md)
122
+ - [Reference](docs/reference/index.md)
120
123
 
121
124
  ## Community
122
125
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "macrodata-refiner"
3
- version = "0.2.2"
3
+ version = "0.3.0"
4
4
  description = "Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets"
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -17,37 +17,68 @@ authors = [
17
17
  requires-python = ">=3.10"
18
18
  dependencies = [
19
19
  "cloudpickle==3.1.2",
20
- "fsspec",
20
+ "fsspec[http]",
21
21
  "httpx",
22
22
  "loguru",
23
- "opentelemetry-exporter-otlp-proto-http",
24
- "opentelemetry-sdk",
25
23
  "numpy",
26
- "psutil",
27
24
  "orjson",
25
+ "packaging",
28
26
  "pyarrow",
29
27
  "msgspec>=0.20.0",
28
+ "pydantic>=2.0.0",
30
29
  ]
31
30
 
32
31
  [project.optional-dependencies]
33
32
  video = [
34
33
  "av",
34
+ "pillow",
35
35
  ]
36
- robotics = [
37
- "macrodata-refiner[video]",
36
+ hf = [
37
+ "datasets>=3.0.0",
38
38
  "huggingface-hub>=1.4.1",
39
39
  "hf>=1.7.1",
40
40
  ]
41
+ egocentric = [
42
+ "macrodata-refiner[hf]",
43
+ "macrodata-refiner[video]",
44
+ "ego-vision[models]>=0.1.8",
45
+ ]
41
46
  text = [
42
47
  "warcio",
43
48
  ]
49
+ hdf5 = [
50
+ "h5py",
51
+ ]
52
+ zarr = [
53
+ "zarr>=2.18,<3",
54
+ "numcodecs<0.16",
55
+ ]
56
+ mcap = [
57
+ "av",
58
+ "mcap",
59
+ "mcap-protobuf-support",
60
+ "mcap-ros2-support",
61
+ "pillow",
62
+ ]
44
63
  s3 = [
45
64
  "s3fs",
46
65
  ]
66
+ tensorflow = [
67
+ "tensorflow",
68
+ ]
69
+ tfds = [
70
+ "macrodata-refiner[tensorflow]",
71
+ "tensorflow-datasets",
72
+ ]
47
73
  testing = [
48
- "macrodata-refiner[robotics]",
74
+ "macrodata-refiner[hdf5]",
75
+ "macrodata-refiner[hf]",
76
+ "macrodata-refiner[mcap]",
77
+ "macrodata-refiner[video]",
78
+ "macrodata-refiner[zarr]",
49
79
  "macrodata-refiner[text]",
50
80
  "macrodata-refiner[s3]",
81
+ "macrodata-refiner[tfds]",
51
82
  "pytest>=8.0.0",
52
83
  "pytest-cov>=5.0.0",
53
84
  ]
@@ -57,6 +88,7 @@ all = [
57
88
 
58
89
  [project.scripts]
59
90
  macrodata = "refiner.cli.main:main"
91
+ mdr = "refiner.cli.main:main"
60
92
 
61
93
  [build-system]
62
94
  requires = ["setuptools>=77", "wheel"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: macrodata-refiner
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: Refiner by Macrodata Labs, a data processing framework for Machine Learning large scale datasets
5
5
  Author: Macrodata Labs
6
6
  License-Expression: Apache-2.0
@@ -13,30 +13,55 @@ Requires-Python: >=3.10
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
15
  Requires-Dist: cloudpickle==3.1.2
16
- Requires-Dist: fsspec
16
+ Requires-Dist: fsspec[http]
17
17
  Requires-Dist: httpx
18
18
  Requires-Dist: loguru
19
- Requires-Dist: opentelemetry-exporter-otlp-proto-http
20
- Requires-Dist: opentelemetry-sdk
21
19
  Requires-Dist: numpy
22
- Requires-Dist: psutil
23
20
  Requires-Dist: orjson
21
+ Requires-Dist: packaging
24
22
  Requires-Dist: pyarrow
25
23
  Requires-Dist: msgspec>=0.20.0
24
+ Requires-Dist: pydantic>=2.0.0
26
25
  Provides-Extra: video
27
26
  Requires-Dist: av; extra == "video"
28
- Provides-Extra: robotics
29
- Requires-Dist: macrodata-refiner[video]; extra == "robotics"
30
- Requires-Dist: huggingface-hub>=1.4.1; extra == "robotics"
31
- Requires-Dist: hf>=1.7.1; extra == "robotics"
27
+ Requires-Dist: pillow; extra == "video"
28
+ Provides-Extra: hf
29
+ Requires-Dist: datasets>=3.0.0; extra == "hf"
30
+ Requires-Dist: huggingface-hub>=1.4.1; extra == "hf"
31
+ Requires-Dist: hf>=1.7.1; extra == "hf"
32
+ Provides-Extra: egocentric
33
+ Requires-Dist: macrodata-refiner[hf]; extra == "egocentric"
34
+ Requires-Dist: macrodata-refiner[video]; extra == "egocentric"
35
+ Requires-Dist: ego-vision[models]>=0.1.8; extra == "egocentric"
32
36
  Provides-Extra: text
33
37
  Requires-Dist: warcio; extra == "text"
38
+ Provides-Extra: hdf5
39
+ Requires-Dist: h5py; extra == "hdf5"
40
+ Provides-Extra: zarr
41
+ Requires-Dist: zarr<3,>=2.18; extra == "zarr"
42
+ Requires-Dist: numcodecs<0.16; extra == "zarr"
43
+ Provides-Extra: mcap
44
+ Requires-Dist: av; extra == "mcap"
45
+ Requires-Dist: mcap; extra == "mcap"
46
+ Requires-Dist: mcap-protobuf-support; extra == "mcap"
47
+ Requires-Dist: mcap-ros2-support; extra == "mcap"
48
+ Requires-Dist: pillow; extra == "mcap"
34
49
  Provides-Extra: s3
35
50
  Requires-Dist: s3fs; extra == "s3"
51
+ Provides-Extra: tensorflow
52
+ Requires-Dist: tensorflow; extra == "tensorflow"
53
+ Provides-Extra: tfds
54
+ Requires-Dist: macrodata-refiner[tensorflow]; extra == "tfds"
55
+ Requires-Dist: tensorflow-datasets; extra == "tfds"
36
56
  Provides-Extra: testing
37
- Requires-Dist: macrodata-refiner[robotics]; extra == "testing"
57
+ Requires-Dist: macrodata-refiner[hdf5]; extra == "testing"
58
+ Requires-Dist: macrodata-refiner[hf]; extra == "testing"
59
+ Requires-Dist: macrodata-refiner[mcap]; extra == "testing"
60
+ Requires-Dist: macrodata-refiner[video]; extra == "testing"
61
+ Requires-Dist: macrodata-refiner[zarr]; extra == "testing"
38
62
  Requires-Dist: macrodata-refiner[text]; extra == "testing"
39
63
  Requires-Dist: macrodata-refiner[s3]; extra == "testing"
64
+ Requires-Dist: macrodata-refiner[tfds]; extra == "testing"
40
65
  Requires-Dist: pytest>=8.0.0; extra == "testing"
41
66
  Requires-Dist: pytest-cov>=5.0.0; extra == "testing"
42
67
  Provides-Extra: all
@@ -49,9 +74,10 @@ Dynamic: license-file
49
74
 
50
75
  <h1 align="center">Macrodata Refiner</h1>
51
76
 
52
- Refiner is an open-source engine for turning raw, unstructured, and multimodal data into **high-quality datasets** for large model training.
77
+ Refiner is an open-source engine for turning raw robotics and multimodal data into **high-quality datasets** for model training.
53
78
 
54
- It replaces the brittle scripts and stitched-together data tooling that teams still use for training data work, while offering much better support for multimodal data, robotics workflows, and model-based processing.
79
+ It gives training-data teams one pipeline model for multimodal data, robotics
80
+ workflows, and model-based processing.
55
81
 
56
82
  It also plugs into the Macrodata platform, which gives you visibility into what is happening to your data while pipelines run: job and shard lifecycle, logs, metrics, manifests, and pipeline behavior. The same code can run locally for development and then scale out through Macrodata's elastic serverless cloud.
57
83
 
@@ -90,7 +116,7 @@ import refiner as mdr
90
116
  pad_frames=5,
91
117
  )
92
118
  )
93
- .write_lerobot("hf://buckets/macrodata/test_bucket/aloha_motion")
119
+ .write_lerobot("hf://buckets/acme-robotics/aloha_motion")
94
120
  .launch_cloud(
95
121
  name="motion_trim",
96
122
  num_workers=4,
@@ -98,7 +124,7 @@ import refiner as mdr
98
124
  )
99
125
  ```
100
126
 
101
- Need cloud GPUs? See [Launchers](docs/launchers.md) for the GPU-specific cloud options.
127
+ Need cloud GPUs? See [Resources, GPUs, and Services](docs/running-pipelines/resources-gpus-and-services.md).
102
128
 
103
129
  ### Local example
104
130
 
@@ -137,31 +163,33 @@ def add_preview(row):
137
163
 
138
164
  - training-data-first pipeline primitives instead of generic ETL abstractions
139
165
  - multimodal processing, with robotics support today
140
- - a lot of built-in readers, transforms, sinks, and lifecycle/runtime machinery so you do not have to rebuild the same scaffolding in scripts
166
+ - built-in readers, transforms, sinks, and runtime machinery for common dataset work
141
167
  - access to any storage backend supported by `fsspec` (S3, GCP, Hugging Face, etc.)
142
168
  - local execution for development and elastic cloud execution for large runs
143
- - built-in observability through the Macrodata platform, so you can inspect how your data is changing instead of debugging blindly after the fact
169
+ - built-in observability through the Macrodata platform for job state, logs, metrics, and manifests
144
170
 
145
171
  ## Docs
146
172
 
147
- Getting started:
173
+ Start here:
148
174
 
149
- - [Pipeline basics](docs/pipeline-basics.md)
150
- - [Launchers](docs/launchers.md)
151
- - [CLI](docs/cli.md)
175
+ - [Docs index](docs/index.md)
176
+ - [Quickstart](docs/quickstart.md)
177
+ - [Running pipelines](docs/running-pipelines/index.md)
152
178
 
153
- Core concepts:
179
+ Build a dataset:
154
180
 
155
- - [Reading and writing data](docs/reading-and-writing.md)
156
- - [Transforms](docs/transforms.md)
157
- - [Expressions](docs/expressions.md)
158
- - [In-process debugging](docs/in-process-debugging.md)
159
- - [Task pipelines](docs/task-pipelines.md)
181
+ - [Reading data](docs/reading-data/index.md)
182
+ - [Episode data](docs/episode-data/index.md)
183
+ - [Transforms](docs/transforms/index.md)
184
+ - [Episode operations](docs/episode-operations/index.md)
185
+ - [Writing data](docs/writing-data/index.md)
186
+ - [Examples](docs/examples/index.md)
160
187
 
161
- Modalities and platform:
188
+ Operate jobs:
162
189
 
163
- - [Robotics](docs/robotics.md)
164
- - [Observability](docs/observability.md)
190
+ - [Platform](docs/platform/index.md)
191
+ - [CLI](docs/cli/index.md)
192
+ - [Reference](docs/reference/index.md)
165
193
 
166
194
  ## Community
167
195
 
@@ -8,11 +8,37 @@ src/macrodata_refiner.egg-info/entry_points.txt
8
8
  src/macrodata_refiner.egg-info/requires.txt
9
9
  src/macrodata_refiner.egg-info/top_level.txt
10
10
  src/refiner/__init__.py
11
+ src/refiner/job_urls.py
11
12
  src/refiner/py.typed
12
13
  src/refiner/cli/__init__.py
13
14
  src/refiner/cli/auth.py
15
+ src/refiner/cli/common.py
14
16
  src/refiner/cli/main.py
15
- src/refiner/cli/ui.py
17
+ src/refiner/cli/secrets.py
18
+ src/refiner/cli/commands/__init__.py
19
+ src/refiner/cli/commands/auth.py
20
+ src/refiner/cli/commands/jobs.py
21
+ src/refiner/cli/commands/run.py
22
+ src/refiner/cli/commands/secrets.py
23
+ src/refiner/cli/jobs/__init__.py
24
+ src/refiner/cli/jobs/attach.py
25
+ src/refiner/cli/jobs/common.py
26
+ src/refiner/cli/jobs/control.py
27
+ src/refiner/cli/jobs/follow.py
28
+ src/refiner/cli/jobs/get.py
29
+ src/refiner/cli/jobs/list.py
30
+ src/refiner/cli/jobs/logs.py
31
+ src/refiner/cli/jobs/manifest.py
32
+ src/refiner/cli/jobs/metrics.py
33
+ src/refiner/cli/jobs/workers.py
34
+ src/refiner/cli/run/__init__.py
35
+ src/refiner/cli/run/cloud.py
36
+ src/refiner/cli/run/command.py
37
+ src/refiner/cli/run/local.py
38
+ src/refiner/cli/run/modes.py
39
+ src/refiner/cli/ui/__init__.py
40
+ src/refiner/cli/ui/console.py
41
+ src/refiner/cli/ui/terminal.py
16
42
  src/refiner/execution/__init__.py
17
43
  src/refiner/execution/buffer.py
18
44
  src/refiner/execution/engine.py
@@ -24,6 +50,25 @@ src/refiner/execution/operators/row.py
24
50
  src/refiner/execution/operators/vectorized.py
25
51
  src/refiner/execution/tracking/__init__.py
26
52
  src/refiner/execution/tracking/shards.py
53
+ src/refiner/inference/__init__.py
54
+ src/refiner/inference/capabilities.py
55
+ src/refiner/inference/generate_pooling.py
56
+ src/refiner/inference/generate_text.py
57
+ src/refiner/inference/types.py
58
+ src/refiner/inference/internal/__init__.py
59
+ src/refiner/inference/internal/media.py
60
+ src/refiner/inference/internal/message_conversion.py
61
+ src/refiner/inference/internal/response.py
62
+ src/refiner/inference/internal/runtime.py
63
+ src/refiner/inference/internal/schema.py
64
+ src/refiner/inference/internal/transport.py
65
+ src/refiner/inference/internal/usage.py
66
+ src/refiner/inference/providers/__init__.py
67
+ src/refiner/inference/providers/anthropic.py
68
+ src/refiner/inference/providers/base.py
69
+ src/refiner/inference/providers/google.py
70
+ src/refiner/inference/providers/openai.py
71
+ src/refiner/inference/providers/warnings.py
27
72
  src/refiner/io/__init__.py
28
73
  src/refiner/io/datafile.py
29
74
  src/refiner/io/datafolder.py
@@ -32,21 +77,29 @@ src/refiner/launchers/__init__.py
32
77
  src/refiner/launchers/base.py
33
78
  src/refiner/launchers/cloud.py
34
79
  src/refiner/launchers/local.py
80
+ src/refiner/launchers/secrets.py
35
81
  src/refiner/pipeline/__init__.py
36
82
  src/refiner/pipeline/expressions.py
37
83
  src/refiner/pipeline/pipeline.py
38
84
  src/refiner/pipeline/planning.py
85
+ src/refiner/pipeline/resources.py
39
86
  src/refiner/pipeline/steps.py
40
87
  src/refiner/pipeline/data/block.py
88
+ src/refiner/pipeline/data/datatype.py
41
89
  src/refiner/pipeline/data/row.py
42
90
  src/refiner/pipeline/data/shard.py
43
91
  src/refiner/pipeline/data/tabular.py
44
92
  src/refiner/pipeline/sinks/__init__.py
93
+ src/refiner/pipeline/sinks/assets.py
45
94
  src/refiner/pipeline/sinks/base.py
46
95
  src/refiner/pipeline/sinks/jsonl.py
47
96
  src/refiner/pipeline/sinks/lerobot.py
48
- src/refiner/pipeline/sinks/lerobot_reducer.py
49
97
  src/refiner/pipeline/sinks/parquet.py
98
+ src/refiner/pipeline/sinks/zarr.py
99
+ src/refiner/pipeline/sinks/reducer/__init__.py
100
+ src/refiner/pipeline/sinks/reducer/file.py
101
+ src/refiner/pipeline/sinks/reducer/lerobot.py
102
+ src/refiner/pipeline/sinks/reducer/zarr.py
50
103
  src/refiner/pipeline/sources/__init__.py
51
104
  src/refiner/pipeline/sources/base.py
52
105
  src/refiner/pipeline/sources/items.py
@@ -54,10 +107,17 @@ src/refiner/pipeline/sources/task.py
54
107
  src/refiner/pipeline/sources/readers/__init__.py
55
108
  src/refiner/pipeline/sources/readers/base.py
56
109
  src/refiner/pipeline/sources/readers/csv.py
57
- src/refiner/pipeline/sources/readers/jsonl.py
110
+ src/refiner/pipeline/sources/readers/files.py
111
+ src/refiner/pipeline/sources/readers/hdf5.py
112
+ src/refiner/pipeline/sources/readers/hf_dataset.py
113
+ src/refiner/pipeline/sources/readers/json.py
58
114
  src/refiner/pipeline/sources/readers/lerobot.py
115
+ src/refiner/pipeline/sources/readers/mcap.py
59
116
  src/refiner/pipeline/sources/readers/parquet.py
117
+ src/refiner/pipeline/sources/readers/tfds.py
118
+ src/refiner/pipeline/sources/readers/tfrecord.py
60
119
  src/refiner/pipeline/sources/readers/utils.py
120
+ src/refiner/pipeline/sources/readers/zarr.py
61
121
  src/refiner/pipeline/utils/__init__.py
62
122
  src/refiner/pipeline/utils/cache/__init__.py
63
123
  src/refiner/pipeline/utils/cache/decoder_cache.py
@@ -68,11 +128,16 @@ src/refiner/platform/auth.py
68
128
  src/refiner/platform/manifest.py
69
129
  src/refiner/platform/client/__init__.py
70
130
  src/refiner/platform/client/api.py
71
- src/refiner/platform/client/http.py
72
131
  src/refiner/platform/client/models.py
73
132
  src/refiner/platform/client/serialize.py
74
133
  src/refiner/robotics/__init__.py
134
+ src/refiner/robotics/egocentric.py
75
135
  src/refiner/robotics/motion.py
136
+ src/refiner/robotics/reward.py
137
+ src/refiner/robotics/row.py
138
+ src/refiner/robotics/subtask_annotation.py
139
+ src/refiner/robotics/synchronization.py
140
+ src/refiner/robotics/tabular.py
76
141
  src/refiner/robotics/lerobot_format/__init__.py
77
142
  src/refiner/robotics/lerobot_format/row.py
78
143
  src/refiner/robotics/lerobot_format/tabular.py
@@ -81,11 +146,17 @@ src/refiner/robotics/lerobot_format/metadata/info.py
81
146
  src/refiner/robotics/lerobot_format/metadata/metadata.py
82
147
  src/refiner/robotics/lerobot_format/metadata/stats.py
83
148
  src/refiner/robotics/lerobot_format/metadata/tasks.py
149
+ src/refiner/services/__init__.py
150
+ src/refiner/services/base.py
151
+ src/refiner/services/discovery.py
152
+ src/refiner/services/manager.py
153
+ src/refiner/services/vllm.py
84
154
  src/refiner/text/__init__.py
85
155
  src/refiner/text/commoncrawl.py
86
156
  src/refiner/utils/__init__.py
87
157
  src/refiner/utils/imports.py
88
158
  src/refiner/video/__init__.py
159
+ src/refiner/video/decode.py
89
160
  src/refiner/video/remux.py
90
161
  src/refiner/video/transcode.py
91
162
  src/refiner/video/types.py
@@ -93,25 +164,17 @@ src/refiner/video/writer.py
93
164
  src/refiner/worker/__init__.py
94
165
  src/refiner/worker/context.py
95
166
  src/refiner/worker/entrypoint.py
167
+ src/refiner/worker/lifecycle.py
96
168
  src/refiner/worker/runner.py
97
169
  src/refiner/worker/workdir.py
98
- src/refiner/worker/lifecycle/__init__.py
99
- src/refiner/worker/lifecycle/base.py
100
- src/refiner/worker/lifecycle/platform.py
101
- src/refiner/worker/lifecycle/local/__init__.py
102
- src/refiner/worker/lifecycle/local/claim.py
103
- src/refiner/worker/lifecycle/local/files.py
104
- src/refiner/worker/lifecycle/local/lifecycle.py
105
170
  src/refiner/worker/metrics/__init__.py
106
171
  src/refiner/worker/metrics/api.py
107
- src/refiner/worker/metrics/context.py
108
- src/refiner/worker/metrics/otel.py
172
+ src/refiner/worker/metrics/emitter.py
109
173
  src/refiner/worker/resources/__init__.py
110
174
  src/refiner/worker/resources/cpu.py
111
175
  src/refiner/worker/resources/gpu.py
112
- src/refiner/worker/resources/memory.py
113
- src/refiner/worker/resources/network.py
114
176
  tests/test_cache.py
115
177
  tests/test_commoncrawl_text.py
116
178
  tests/test_expressions.py
117
- tests/test_optional_dependencies.py
179
+ tests/test_optional_dependencies.py
180
+ tests/test_video_decode.py
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
2
  macrodata = refiner.cli.main:main
3
+ mdr = refiner.cli.main:main