fairspec-dataset 0.0.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. fairspec_dataset-0.0.0.dev0/.gitignore +229 -0
  2. fairspec_dataset-0.0.0.dev0/PKG-INFO +20 -0
  3. fairspec_dataset-0.0.0.dev0/README.md +3 -0
  4. fairspec_dataset-0.0.0.dev0/fairspec_dataset/__init__.py +92 -0
  5. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/dataset/basepath.py +62 -0
  6. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/dataset/basepath_spec.py +60 -0
  7. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/dataset/merge.py +23 -0
  8. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/copy.py +14 -0
  9. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/copy_spec.py +69 -0
  10. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/describe.py +23 -0
  11. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/fixtures/generated/TestPrefetchFiles.test_prefetches_remote_file.yaml +68 -0
  12. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/fixtures/generated/TestPrefetchFiles.test_prefetches_remote_file_with_max_bytes.yaml +68 -0
  13. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/infer.py +96 -0
  14. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/infer_spec.py +174 -0
  15. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/load.py +8 -0
  16. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/path.py +12 -0
  17. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/prefetch.py +35 -0
  18. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/prefetch_spec.py +38 -0
  19. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/save.py +9 -0
  20. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/temp.py +49 -0
  21. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/validate.py +43 -0
  22. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/file/validate_spec.py +215 -0
  23. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/folder/create.py +7 -0
  24. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/folder/create_spec.py +51 -0
  25. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/folder/temp.py +21 -0
  26. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/resource/save.py +94 -0
  27. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/resource/save_spec.py +137 -0
  28. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/stream/concat.py +11 -0
  29. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/stream/concat_spec.py +70 -0
  30. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/stream/load.py +56 -0
  31. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/stream/load_spec.py +69 -0
  32. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/stream/save.py +20 -0
  33. fairspec_dataset-0.0.0.dev0/fairspec_dataset/actions/stream/save_spec.py +73 -0
  34. fairspec_dataset-0.0.0.dev0/fairspec_dataset/conftest.py +10 -0
  35. fairspec_dataset-0.0.0.dev0/fairspec_dataset/models/__init__.py +0 -0
  36. fairspec_dataset-0.0.0.dev0/fairspec_dataset/models/dataset.py +14 -0
  37. fairspec_dataset-0.0.0.dev0/fairspec_dataset/models/file.py +11 -0
  38. fairspec_dataset-0.0.0.dev0/fairspec_dataset/models/file_dialect.py +7 -0
  39. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugin.py +31 -0
  40. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/__init__.py +0 -0
  41. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/__init__.py +9 -0
  42. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/__init__.py +1 -0
  43. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/dataset/__init__.py +1 -0
  44. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/dataset/fixtures/ckan-dataset.json +308 -0
  45. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/dataset/from_ckan.py +97 -0
  46. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/dataset/from_ckan_spec.py +101 -0
  47. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/dataset/load.py +89 -0
  48. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/dataset/save.py +122 -0
  49. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/dataset/to_ckan.py +94 -0
  50. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/dataset/to_ckan_spec.py +169 -0
  51. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/resource/__init__.py +1 -0
  52. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/resource/from_ckan.py +79 -0
  53. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/resource/to_ckan.py +59 -0
  54. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/table_schema/__init__.py +1 -0
  55. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/table_schema/fixtures/ckan-schema.json +115 -0
  56. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/table_schema/from_ckan.py +118 -0
  57. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/table_schema/from_ckan_spec.py +150 -0
  58. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/table_schema/to_ckan.py +65 -0
  59. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/actions/table_schema/to_ckan_spec.py +227 -0
  60. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/models/__init__.py +1 -0
  61. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/models/dataset.py +27 -0
  62. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/models/field.py +15 -0
  63. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/models/organization.py +10 -0
  64. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/models/resource.py +24 -0
  65. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/models/schema.py +9 -0
  66. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/models/tag.py +9 -0
  67. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/plugin.py +24 -0
  68. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/plugin_spec.py +70 -0
  69. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/services/__init__.py +1 -0
  70. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/ckan/services/ckan.py +55 -0
  71. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/descriptor/__init__.py +3 -0
  72. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/descriptor/plugin.py +43 -0
  73. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/descriptor/plugin_spec.py +160 -0
  74. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/folder/__init__.py +9 -0
  75. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/folder/actions/__init__.py +0 -0
  76. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/folder/actions/dataset/__init__.py +0 -0
  77. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/folder/actions/dataset/load.py +10 -0
  78. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/folder/actions/dataset/load_spec.py +154 -0
  79. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/folder/actions/dataset/save.py +61 -0
  80. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/folder/actions/dataset/save_spec.py +257 -0
  81. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/folder/plugin.py +45 -0
  82. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/folder/plugin_spec.py +114 -0
  83. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/__init__.py +9 -0
  84. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/actions/__init__.py +1 -0
  85. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/actions/dataset/__init__.py +1 -0
  86. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/actions/dataset/from_github.py +102 -0
  87. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/actions/dataset/load.py +61 -0
  88. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/actions/dataset/save.py +96 -0
  89. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/actions/resource/__init__.py +1 -0
  90. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/actions/resource/from_github.py +48 -0
  91. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/models/__init__.py +1 -0
  92. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/models/file.py +12 -0
  93. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/models/license.py +10 -0
  94. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/models/owner.py +11 -0
  95. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/models/repository.py +32 -0
  96. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/plugin.py +25 -0
  97. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/plugin_spec.py +59 -0
  98. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/services/__init__.py +1 -0
  99. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/github/services/github.py +33 -0
  100. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/__init__.py +9 -0
  101. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/actions/__init__.py +1 -0
  102. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/actions/dataset/__init__.py +1 -0
  103. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/actions/dataset/from_zenodo.py +85 -0
  104. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/actions/dataset/load.py +56 -0
  105. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/actions/dataset/save.py +97 -0
  106. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/actions/dataset/to_zenodo.py +60 -0
  107. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/actions/resource/__init__.py +1 -0
  108. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/actions/resource/from_zenodo.py +35 -0
  109. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/actions/resource/to_zenodo.py +19 -0
  110. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/models/__init__.py +1 -0
  111. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/models/creator.py +9 -0
  112. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/models/file.py +15 -0
  113. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/models/link.py +13 -0
  114. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/models/metadata.py +20 -0
  115. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/models/record.py +16 -0
  116. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/plugin.py +26 -0
  117. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/plugin_spec.py +60 -0
  118. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/services/__init__.py +1 -0
  119. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zenodo/services/zenodo.py +49 -0
  120. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zip/__init__.py +9 -0
  121. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zip/actions/__init__.py +0 -0
  122. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zip/actions/dataset/__init__.py +0 -0
  123. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zip/actions/dataset/load.py +18 -0
  124. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zip/actions/dataset/load_spec.py +130 -0
  125. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zip/actions/dataset/save.py +60 -0
  126. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zip/actions/dataset/save_spec.py +237 -0
  127. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zip/plugin.py +39 -0
  128. fairspec_dataset-0.0.0.dev0/fairspec_dataset/plugins/zip/plugin_spec.py +114 -0
  129. fairspec_dataset-0.0.0.dev0/fairspec_dataset/py.typed +0 -0
  130. fairspec_dataset-0.0.0.dev0/pyproject.toml +40 -0
@@ -0,0 +1,229 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+
204
+ # Ruff stuff:
205
+ .ruff_cache/
206
+
207
+ # PyPI configuration file
208
+ .pypirc
209
+
210
+ # Marimo
211
+ marimo/_static/
212
+ marimo/_lsp/
213
+ __marimo__/
214
+
215
+ # Streamlit
216
+ .streamlit/secrets.toml
217
+
218
+ # Node
219
+ node_modules/
220
+ jspm_packages/
221
+ .lock-wscript
222
+ build/Release
223
+ .node_repl_history
224
+ *.tgz
225
+ .npm
226
+ *.so
227
+
228
+ # User
229
+ /.claude/settings.local.json
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.4
2
+ Name: fairspec-dataset
3
+ Version: 0.0.0.dev0
4
+ Summary: Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames
5
+ Project-URL: homepage, https://github.com/fairspec/fairspec-python
6
+ Project-URL: repository, https://github.com/fairspec/fairspec-python
7
+ Author: Evgeny Karev
8
+ License-Expression: MIT
9
+ Keywords: ckan,data,dataframe,datahub,dataset,fair,fairspec,jsonschema,metadata,polars,python,quality,tableschema,validation
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
+ Requires-Python: >=3.12
14
+ Requires-Dist: charset-normalizer>=3.0
15
+ Requires-Dist: fairspec-metadata
16
+ Description-Content-Type: text/markdown
17
+
18
+ # fairspec-dataset
19
+
20
+ Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please read the [project's documentation](https://python.fairspec.org).
@@ -0,0 +1,3 @@
1
+ # fairspec-dataset
2
+
3
+ Fairspec Python is a fast data management framework built on top of the Fairspec standard and Polars DataFrames. It supports various formats like CSV, JSON, and Parquet and integrates with data platforms such as CKAN, Zenodo, and GitHub. For more information, please read the [project's documentation](https://python.fairspec.org).
@@ -0,0 +1,92 @@
1
+ from .actions.dataset.basepath import get_common_local_basepath, get_dataset_basepath
2
+ from .actions.dataset.merge import merge_datasets
3
+ from .actions.file.copy import copy_file
4
+ from .actions.file.describe import describe_file
5
+ from .models.file import FileDescription
6
+ from .actions.file.infer import infer_bytes, infer_hash, infer_integrity, infer_textual
7
+ from .actions.file.load import load_file
8
+ from .actions.file.path import assert_local_path_vacant, get_is_local_path_exist
9
+ from .actions.file.prefetch import prefetch_file, prefetch_files
10
+ from .actions.file.save import save_file
11
+ from .actions.file.temp import get_temp_file_path, write_temp_file
12
+ from .actions.file.validate import validate_file
13
+ from .actions.folder.create import create_folder
14
+ from .actions.folder.temp import get_temp_folder_path
15
+ from .actions.resource.save import SaveFileCallback, SaveFileProps, save_resource_files
16
+ from .actions.stream.concat import concat_file_streams
17
+ from .actions.stream.load import load_file_stream
18
+ from .actions.stream.save import save_file_stream
19
+ from .models.dataset import SaveDatasetOptions
20
+ from .models.file_dialect import InferFileDialectOptions
21
+ from .models.dataset import SaveDatasetResult
22
+ from .plugin import DatasetPlugin
23
+ from .plugins.descriptor import DescriptorPlugin
24
+ from .plugins.ckan import CkanPlugin, load_dataset_from_ckan, save_dataset_to_ckan
25
+ from .plugins.folder import (
26
+ FolderPlugin,
27
+ load_dataset_from_folder,
28
+ save_dataset_to_folder,
29
+ )
30
+ from .plugins.github import (
31
+ GithubPlugin,
32
+ load_dataset_from_github,
33
+ save_dataset_to_github,
34
+ )
35
+ from .plugins.zenodo import (
36
+ ZenodoPlugin,
37
+ load_dataset_from_zenodo,
38
+ save_dataset_to_zenodo,
39
+ )
40
+ from .plugins.zip import ZipPlugin, load_dataset_from_zip, save_dataset_to_zip
41
+ from fairspec_metadata.plugin import MetadataPlugin
42
+
43
+ __all__ = [
44
+ "CkanPlugin",
45
+ "DatasetPlugin",
46
+ "DescriptorPlugin",
47
+ "FileDescription",
48
+ "FolderPlugin",
49
+ "GithubPlugin",
50
+ "InferFileDialectOptions",
51
+ "MetadataPlugin",
52
+ "SaveDatasetOptions",
53
+ "SaveDatasetResult",
54
+ "SaveFileCallback",
55
+ "SaveFileProps",
56
+ "ZenodoPlugin",
57
+ "ZipPlugin",
58
+ "assert_local_path_vacant",
59
+ "concat_file_streams",
60
+ "copy_file",
61
+ "create_folder",
62
+ "describe_file",
63
+ "get_common_local_basepath",
64
+ "get_dataset_basepath",
65
+ "get_is_local_path_exist",
66
+ "get_temp_file_path",
67
+ "get_temp_folder_path",
68
+ "infer_bytes",
69
+ "infer_hash",
70
+ "infer_integrity",
71
+ "infer_textual",
72
+ "load_dataset_from_ckan",
73
+ "load_dataset_from_folder",
74
+ "load_dataset_from_github",
75
+ "load_dataset_from_zenodo",
76
+ "load_dataset_from_zip",
77
+ "load_file",
78
+ "load_file_stream",
79
+ "merge_datasets",
80
+ "prefetch_file",
81
+ "prefetch_files",
82
+ "save_dataset_to_ckan",
83
+ "save_dataset_to_folder",
84
+ "save_dataset_to_github",
85
+ "save_dataset_to_zenodo",
86
+ "save_dataset_to_zip",
87
+ "save_file",
88
+ "save_file_stream",
89
+ "save_resource_files",
90
+ "validate_file",
91
+ "write_temp_file",
92
+ ]
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import TYPE_CHECKING
5
+
6
+ from fairspec_metadata import get_basepath, get_data_paths, get_is_remote_path
7
+ from fairspec_metadata.actions.path.general import safe_relpath
8
+
9
+ if TYPE_CHECKING:
10
+ from fairspec_metadata import Dataset
11
+
12
+
13
+ def get_dataset_basepath(dataset: Dataset) -> str | None:
14
+ paths: list[str] = []
15
+
16
+ for resource in dataset.resources or []:
17
+ resource_paths = get_data_paths(resource)
18
+ paths.extend(resource_paths)
19
+
20
+ return get_common_local_basepath(paths)
21
+
22
+
23
+ def get_common_local_basepath(paths: list[str]) -> str | None:
24
+ absolute_basepaths = [
25
+ os.path.abspath(get_basepath(path))
26
+ for path in paths
27
+ if not get_is_remote_path(path)
28
+ ]
29
+
30
+ if not absolute_basepaths:
31
+ return None
32
+
33
+ segment_table = [
34
+ [segment or "/" for segment in path.split(os.sep)]
35
+ for path in absolute_basepaths
36
+ ]
37
+
38
+ column = 0
39
+ segments: list[str] = []
40
+
41
+ while True:
42
+ segment_column = [
43
+ row[column] if column < len(row) else None for row in segment_table
44
+ ]
45
+ unique_segments = set(segment_column)
46
+
47
+ if len(unique_segments) != 1:
48
+ break
49
+ if segment_column[0] is None:
50
+ break
51
+
52
+ column += 1
53
+ segments.append(segment_column[0])
54
+
55
+ if not segments:
56
+ raise ValueError("Cannot find common basepath")
57
+
58
+ if segments[0].endswith(":"):
59
+ segments[0] += os.sep
60
+
61
+ basepath = safe_relpath(os.path.join(*segments))
62
+ return "" if basepath == "." else basepath
@@ -0,0 +1,60 @@
1
+ import pytest
2
+
3
+ from .basepath import get_common_local_basepath
4
+
5
+
6
+ class TestGetCommonLocalBasepath:
7
+ @pytest.mark.parametrize(
8
+ "description, paths, expected",
9
+ [
10
+ (
11
+ "same directory different files",
12
+ ["data/table1.csv", "data/table2.csv"],
13
+ "data",
14
+ ),
15
+ (
16
+ "nested directories",
17
+ ["data/nested/file1.csv", "data/nested/file2.csv", "data/file3.csv"],
18
+ "data",
19
+ ),
20
+ (
21
+ "single path",
22
+ ["data/file.csv"],
23
+ "data",
24
+ ),
25
+ (
26
+ "root level files",
27
+ ["file1.csv", "file2.csv"],
28
+ "",
29
+ ),
30
+ (
31
+ "different top-level directories",
32
+ ["data1/file1.csv", "data2/file2.csv"],
33
+ "",
34
+ ),
35
+ (
36
+ "empty paths array",
37
+ [],
38
+ None,
39
+ ),
40
+ (
41
+ "some paths are remote",
42
+ ["https://example.com/table.csv", "data/table.csv"],
43
+ "data",
44
+ ),
45
+ (
46
+ "all paths are remote",
47
+ [
48
+ "https://example.com/table1.csv",
49
+ "https://example.com/table2.csv",
50
+ ],
51
+ None,
52
+ ),
53
+ ],
54
+ )
55
+ def test_get_common_local_basepath(self, description, paths, expected):
56
+ result = get_common_local_basepath(paths)
57
+ if expected is None:
58
+ assert result is None
59
+ else:
60
+ assert result == expected
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata import load_dataset_descriptor
4
+ from fairspec_metadata import Dataset
5
+
6
+
7
+ def merge_datasets(
8
+ *,
9
+ system_dataset: Dataset,
10
+ user_dataset_path: str | None = None,
11
+ ) -> Dataset:
12
+ system = system_dataset.model_dump(by_alias=True, exclude_none=True)
13
+
14
+ user_dataset = (
15
+ load_dataset_descriptor(user_dataset_path).model_dump(
16
+ by_alias=True, exclude_none=True
17
+ )
18
+ if user_dataset_path
19
+ else None
20
+ )
21
+
22
+ merged = {**system, **user_dataset} if user_dataset else {**system}
23
+ return Dataset(**merged)
@@ -0,0 +1,14 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_dataset.actions.stream.load import load_file_stream
4
+ from fairspec_dataset.actions.stream.save import save_file_stream
5
+
6
+
7
+ def copy_file(
8
+ *,
9
+ source_path: str,
10
+ target_path: str,
11
+ max_bytes: int | None = None,
12
+ ) -> None:
13
+ stream = load_file_stream(source_path, max_bytes=max_bytes)
14
+ save_file_stream(stream, path=target_path)
@@ -0,0 +1,69 @@
1
+ import os
2
+
3
+ from fairspec_dataset.actions.file.temp import write_temp_file
4
+
5
+ from .copy import copy_file
6
+
7
+
8
+ class TestCopyFile:
9
+ def test_copies_file(self, tmp_path):
10
+ source = write_temp_file("test content")
11
+ target = str(tmp_path / "target.txt")
12
+ copy_file(source_path=source, target_path=target)
13
+ assert os.path.exists(target)
14
+ with open(target, encoding="utf-8") as f:
15
+ assert f.read() == "test content"
16
+
17
+ def test_copies_exact_content(self, tmp_path):
18
+ content = "Hello, World! This is a test file."
19
+ source = write_temp_file(content)
20
+ target = str(tmp_path / "copy.txt")
21
+ copy_file(source_path=source, target_path=target)
22
+ with open(target, encoding="utf-8") as f:
23
+ assert f.read() == content
24
+
25
+ def test_copies_binary_file(self, tmp_path):
26
+ binary_data = bytes([0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10])
27
+ source = write_temp_file(binary_data)
28
+ target = str(tmp_path / "binary.bin")
29
+ copy_file(source_path=source, target_path=target)
30
+ with open(target, "rb") as f:
31
+ assert f.read() == binary_data
32
+
33
+ def test_copies_empty_file(self, tmp_path):
34
+ source = write_temp_file("")
35
+ target = str(tmp_path / "empty.txt")
36
+ copy_file(source_path=source, target_path=target)
37
+ with open(target, encoding="utf-8") as f:
38
+ assert f.read() == ""
39
+
40
+ def test_copies_large_file(self, tmp_path):
41
+ content = "x" * 100000
42
+ source = write_temp_file(content)
43
+ target = str(tmp_path / "large.txt")
44
+ copy_file(source_path=source, target_path=target)
45
+ with open(target, encoding="utf-8") as f:
46
+ assert f.read() == content
47
+
48
+ def test_copies_special_characters(self, tmp_path):
49
+ content = "Special characters: é, ñ, ü, ö, à, 中文, 日本語"
50
+ source = write_temp_file(content)
51
+ target = str(tmp_path / "special.txt")
52
+ copy_file(source_path=source, target_path=target)
53
+ with open(target, encoding="utf-8") as f:
54
+ assert f.read() == content
55
+
56
+ def test_copies_to_nested_directory(self, tmp_path):
57
+ source = write_temp_file("nested content")
58
+ target = str(tmp_path / "nested" / "dir" / "file.txt")
59
+ copy_file(source_path=source, target_path=target)
60
+ with open(target, encoding="utf-8") as f:
61
+ assert f.read() == "nested content"
62
+
63
+ def test_copies_with_newlines(self, tmp_path):
64
+ content = "Line 1\nLine 2\nLine 3\n"
65
+ source = write_temp_file(content)
66
+ target = str(tmp_path / "multiline.txt")
67
+ copy_file(source_path=source, target_path=target)
68
+ with open(target, encoding="utf-8") as f:
69
+ assert f.read() == content
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+
3
+ from fairspec_metadata import Resource
4
+
5
+ from fairspec_dataset.models.file import FileDescription
6
+
7
+ from .infer import infer_bytes, infer_integrity, infer_textual
8
+ from .prefetch import prefetch_file
9
+
10
+
11
+ def describe_file(
12
+ path: str,
13
+ *,
14
+ hash_type: str = "sha256",
15
+ ) -> FileDescription:
16
+ local_path = prefetch_file(path)
17
+ resource = Resource(data=local_path)
18
+
19
+ return FileDescription(
20
+ bytes=infer_bytes(resource),
21
+ textual=infer_textual(resource),
22
+ integrity=infer_integrity(resource, hash_type=hash_type),
23
+ )
@@ -0,0 +1,68 @@
1
+ interactions:
2
+ - request:
3
+ body: null
4
+ headers:
5
+ Connection:
6
+ - close
7
+ Host:
8
+ - raw.githubusercontent.com
9
+ User-Agent:
10
+ - Python-urllib/3.12
11
+ method: GET
12
+ uri: https://raw.githubusercontent.com/fairspec/fairspec-typescript/refs/heads/main/table/plugins/csv/actions/table/fixtures/table.csv
13
+ response:
14
+ body:
15
+ string: "id,name\n1,english\n2,\u4E2D\u6587\n"
16
+ headers:
17
+ Accept-Ranges:
18
+ - bytes
19
+ Access-Control-Allow-Origin:
20
+ - '*'
21
+ Cache-Control:
22
+ - max-age=300
23
+ Connection:
24
+ - close
25
+ Content-Length:
26
+ - '27'
27
+ Content-Security-Policy:
28
+ - default-src 'none'; style-src 'unsafe-inline'; sandbox
29
+ Content-Type:
30
+ - text/plain; charset=utf-8
31
+ Cross-Origin-Resource-Policy:
32
+ - cross-origin
33
+ Date:
34
+ - Tue, 10 Feb 2026 15:25:06 GMT
35
+ ETag:
36
+ - '"f32e41a0928646326054d6fac4ec63789daf58505efb50237c672db52692e86e"'
37
+ Expires:
38
+ - Tue, 10 Feb 2026 15:30:06 GMT
39
+ Source-Age:
40
+ - '0'
41
+ Strict-Transport-Security:
42
+ - max-age=31536000
43
+ Vary:
44
+ - Authorization,Accept-Encoding
45
+ Via:
46
+ - 1.1 varnish
47
+ X-Cache:
48
+ - MISS
49
+ X-Cache-Hits:
50
+ - '0'
51
+ X-Content-Type-Options:
52
+ - nosniff
53
+ X-Fastly-Request-ID:
54
+ - 53bfbb3d5d4a7589325b5a5e917af77c465ca0d7
55
+ X-Frame-Options:
56
+ - deny
57
+ X-GitHub-Request-Id:
58
+ - 9622:3B543:C6F20F:DB1A96:698B4DD1
59
+ X-Served-By:
60
+ - cache-lis1490031-LIS
61
+ X-Timer:
62
+ - S1770737106.006396,VS0,VE190
63
+ X-XSS-Protection:
64
+ - 1; mode=block
65
+ status:
66
+ code: 200
67
+ message: OK
68
+ version: 1