py-jsonl 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_jsonl-1.0.0/LICENSE +21 -0
- py_jsonl-1.0.0/PKG-INFO +153 -0
- py_jsonl-1.0.0/README.md +127 -0
- py_jsonl-1.0.0/jsonl.py +173 -0
- py_jsonl-1.0.0/py_jsonl.egg-info/PKG-INFO +153 -0
- py_jsonl-1.0.0/py_jsonl.egg-info/SOURCES.txt +16 -0
- py_jsonl-1.0.0/py_jsonl.egg-info/dependency_links.txt +1 -0
- py_jsonl-1.0.0/py_jsonl.egg-info/not-zip-safe +1 -0
- py_jsonl-1.0.0/py_jsonl.egg-info/top_level.txt +1 -0
- py_jsonl-1.0.0/setup.cfg +4 -0
- py_jsonl-1.0.0/setup.py +42 -0
- py_jsonl-1.0.0/tests/test_dump.py +21 -0
- py_jsonl-1.0.0/tests/test_dump_fork.py +40 -0
- py_jsonl-1.0.0/tests/test_dump_into.py +24 -0
- py_jsonl-1.0.0/tests/test_dumper.py +16 -0
- py_jsonl-1.0.0/tests/test_dumps.py +14 -0
- py_jsonl-1.0.0/tests/test_load.py +33 -0
- py_jsonl-1.0.0/tests/test_load_from.py +35 -0
py_jsonl-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Rolando Morales Perez
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
py_jsonl-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: py-jsonl
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A Python Library for Handling JSON Lines Files
|
|
5
|
+
Home-page: https://github.com/rmoralespp/jsonl
|
|
6
|
+
Author: rmoralespp
|
|
7
|
+
Author-email: rmoralespp@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: jsonlines,ndjson,jsonl
|
|
10
|
+
Classifier: Programming Language :: Python
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Intended Audience :: Information Technology
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Topic :: Internet
|
|
21
|
+
Classifier: Topic :: Utilities
|
|
22
|
+
Classifier: Topic :: File Formats :: JSON
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
|
|
27
|
+
# jsonl
|
|
28
|
+
|
|
29
|
+
[](https://github.com/rmoralespp/jsonl/actions?query=event%3Arelease+workflow%3ACI)
|
|
30
|
+
[](https://pypi.python.org/pypi/jsonl-py)
|
|
31
|
+
[](https://github.com/rmoralespp/jsonl)
|
|
32
|
+
[](https://app.codecov.io/gh/rmoralespp/jsonl)
|
|
33
|
+
[](https://github.com/rmoralespp/jsonl/blob/main/LICENSE)
|
|
34
|
+
[](https://github.com/psf/black)
|
|
35
|
+
[](https://github.com/charliermarsh/ruff)
|
|
36
|
+
|
|
37
|
+
### About
|
|
38
|
+
|
|
39
|
+
jsonl is a Python Library for Handling JSON Lines Files
|
|
40
|
+
|
|
41
|
+
`jsonl` exposes an API similar to the `json` module from the standard library.
|
|
42
|
+
|
|
43
|
+
### Installation (via pip)
|
|
44
|
+
|
|
45
|
+
```pip install jsonl```
|
|
46
|
+
|
|
47
|
+
### Tests
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
(env)$ pip install -r requirements.txt # Ignore this command if it has already been executed
|
|
51
|
+
(env)$ pytest tests/
|
|
52
|
+
(env)$ pytest --cov jsonl # Tests with coverge
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Usage
|
|
56
|
+
|
|
57
|
+
##### dumps
|
|
58
|
+
```
|
|
59
|
+
dumps(iterable, **kwargs)
|
|
60
|
+
|
|
61
|
+
Serialize iterable to a JSON lines formatted string.
|
|
62
|
+
|
|
63
|
+
:param Iterable[Any] iterable: Iterable of objects
|
|
64
|
+
:param kwargs: `json.dumps` kwargs
|
|
65
|
+
:rtype: str
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
##### dump
|
|
69
|
+
```
|
|
70
|
+
dump(iterable, fp, **kwargs)
|
|
71
|
+
|
|
72
|
+
Serialize iterable as a JSON lines formatted stream to file-like object.
|
|
73
|
+
|
|
74
|
+
:param Iterable[Any] iterable: Iterable of objects
|
|
75
|
+
:param fp: file-like object
|
|
76
|
+
:param kwargs: `json.dumps` kwargs
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
import jsonl.dump
|
|
80
|
+
|
|
81
|
+
data = ({'foo': 1}, {'bar': 2})
|
|
82
|
+
with open('myfile.jsonl', mode='w', encoding='utf-8') as file:
|
|
83
|
+
jsonl.dump(data, file)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
##### dump_into
|
|
88
|
+
```
|
|
89
|
+
dump_into(filename, iterable, encoding=utf_8, **kwargs)
|
|
90
|
+
|
|
91
|
+
Dump iterable to a JSON lines file.
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
import jsonl.dump_into
|
|
95
|
+
|
|
96
|
+
data = ({'foo': 1}, {'bar': 2})
|
|
97
|
+
jsonl.dump_into("myfile.jsonl", data)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
##### dump_fork
|
|
101
|
+
```
|
|
102
|
+
dump_fork(iterable_by_path, encoding=utf_8, dump_if_empty=True, **kwargs)
|
|
103
|
+
|
|
104
|
+
Incrementally dumps different groups of elements into
|
|
105
|
+
the indicated JSON lines file.
|
|
106
|
+
***Useful to reduce memory consumption***
|
|
107
|
+
|
|
108
|
+
:param Iterable[file_path, Iterable[dict]] iterable_by_path: Group items by file path
|
|
109
|
+
:param encoding: file encoding. 'utf-8' used by default
|
|
110
|
+
:param bool dump_if_empty: If false, don't create an empty JSON lines file.
|
|
111
|
+
:param kwargs: `json.dumps` kwargs
|
|
112
|
+
|
|
113
|
+
Examples:
|
|
114
|
+
import jsonl.dump_fork
|
|
115
|
+
|
|
116
|
+
path_items = (
|
|
117
|
+
("num.jsonl", ({"value": 1}, {"value": 2})),
|
|
118
|
+
("num.jsonl", ({"value": 3},)),
|
|
119
|
+
("foo.jsonl", ({"a": "1"}, {"b": 2})),
|
|
120
|
+
("baz.jsonl", ()),
|
|
121
|
+
)
|
|
122
|
+
jsonl.dump_fork(path_items)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
##### load
|
|
126
|
+
```
|
|
127
|
+
load(fp, **kwargs)
|
|
128
|
+
|
|
129
|
+
Deserialize a file-like object containing JSON Lines into a Python iterable of objects.
|
|
130
|
+
|
|
131
|
+
:param fp: file-like object
|
|
132
|
+
:param kwargs: `json.loads` kwargs
|
|
133
|
+
:rtype: Iterable[Any]
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
##### load_from
|
|
137
|
+
```
|
|
138
|
+
def load_from(filename, encoding=utf_8, **kwargs)
|
|
139
|
+
|
|
140
|
+
Deserialize a JSON Lines file into a Python iterable of objects.
|
|
141
|
+
|
|
142
|
+
:param filename: path
|
|
143
|
+
:param encoding: file encoding. 'utf-8' used by default
|
|
144
|
+
:param kwargs: `json.loads` kwargs
|
|
145
|
+
:rtype: Iterable[str]
|
|
146
|
+
|
|
147
|
+
Examples:
|
|
148
|
+
import jsonl.load_from
|
|
149
|
+
|
|
150
|
+
it = jsonl.load_from("myfile.jsonl")
|
|
151
|
+
next(it)
|
|
152
|
+
|
|
153
|
+
```
|
py_jsonl-1.0.0/README.md
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# jsonl
|
|
2
|
+
|
|
3
|
+
[](https://github.com/rmoralespp/jsonl/actions?query=event%3Arelease+workflow%3ACI)
|
|
4
|
+
[](https://pypi.python.org/pypi/jsonl-py)
|
|
5
|
+
[](https://github.com/rmoralespp/jsonl)
|
|
6
|
+
[](https://app.codecov.io/gh/rmoralespp/jsonl)
|
|
7
|
+
[](https://github.com/rmoralespp/jsonl/blob/main/LICENSE)
|
|
8
|
+
[](https://github.com/psf/black)
|
|
9
|
+
[](https://github.com/charliermarsh/ruff)
|
|
10
|
+
|
|
11
|
+
### About
|
|
12
|
+
|
|
13
|
+
jsonl is a Python Library for Handling JSON Lines Files
|
|
14
|
+
|
|
15
|
+
`jsonl` exposes an API similar to the `json` module from the standard library.
|
|
16
|
+
|
|
17
|
+
### Installation (via pip)
|
|
18
|
+
|
|
19
|
+
```pip install jsonl```
|
|
20
|
+
|
|
21
|
+
### Tests
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
(env)$ pip install -r requirements.txt # Ignore this command if it has already been executed
|
|
25
|
+
(env)$ pytest tests/
|
|
26
|
+
(env)$ pytest --cov jsonl # Tests with coverge
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Usage
|
|
30
|
+
|
|
31
|
+
##### dumps
|
|
32
|
+
```
|
|
33
|
+
dumps(iterable, **kwargs)
|
|
34
|
+
|
|
35
|
+
Serialize iterable to a JSON lines formatted string.
|
|
36
|
+
|
|
37
|
+
:param Iterable[Any] iterable: Iterable of objects
|
|
38
|
+
:param kwargs: `json.dumps` kwargs
|
|
39
|
+
:rtype: str
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
##### dump
|
|
43
|
+
```
|
|
44
|
+
dump(iterable, fp, **kwargs)
|
|
45
|
+
|
|
46
|
+
Serialize iterable as a JSON lines formatted stream to file-like object.
|
|
47
|
+
|
|
48
|
+
:param Iterable[Any] iterable: Iterable of objects
|
|
49
|
+
:param fp: file-like object
|
|
50
|
+
:param kwargs: `json.dumps` kwargs
|
|
51
|
+
|
|
52
|
+
Example:
|
|
53
|
+
import jsonl.dump
|
|
54
|
+
|
|
55
|
+
data = ({'foo': 1}, {'bar': 2})
|
|
56
|
+
with open('myfile.jsonl', mode='w', encoding='utf-8') as file:
|
|
57
|
+
jsonl.dump(data, file)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
##### dump_into
|
|
62
|
+
```
|
|
63
|
+
dump_into(filename, iterable, encoding=utf_8, **kwargs)
|
|
64
|
+
|
|
65
|
+
Dump iterable to a JSON lines file.
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
import jsonl.dump_into
|
|
69
|
+
|
|
70
|
+
data = ({'foo': 1}, {'bar': 2})
|
|
71
|
+
jsonl.dump_into("myfile.jsonl", data)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
##### dump_fork
|
|
75
|
+
```
|
|
76
|
+
dump_fork(iterable_by_path, encoding=utf_8, dump_if_empty=True, **kwargs)
|
|
77
|
+
|
|
78
|
+
Incrementally dumps different groups of elements into
|
|
79
|
+
the indicated JSON lines file.
|
|
80
|
+
***Useful to reduce memory consumption***
|
|
81
|
+
|
|
82
|
+
:param Iterable[file_path, Iterable[dict]] iterable_by_path: Group items by file path
|
|
83
|
+
:param encoding: file encoding. 'utf-8' used by default
|
|
84
|
+
:param bool dump_if_empty: If false, don't create an empty JSON lines file.
|
|
85
|
+
:param kwargs: `json.dumps` kwargs
|
|
86
|
+
|
|
87
|
+
Examples:
|
|
88
|
+
import jsonl.dump_fork
|
|
89
|
+
|
|
90
|
+
path_items = (
|
|
91
|
+
("num.jsonl", ({"value": 1}, {"value": 2})),
|
|
92
|
+
("num.jsonl", ({"value": 3},)),
|
|
93
|
+
("foo.jsonl", ({"a": "1"}, {"b": 2})),
|
|
94
|
+
("baz.jsonl", ()),
|
|
95
|
+
)
|
|
96
|
+
jsonl.dump_fork(path_items)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
##### load
|
|
100
|
+
```
|
|
101
|
+
load(fp, **kwargs)
|
|
102
|
+
|
|
103
|
+
Deserialize a file-like object containing JSON Lines into a Python iterable of objects.
|
|
104
|
+
|
|
105
|
+
:param fp: file-like object
|
|
106
|
+
:param kwargs: `json.loads` kwargs
|
|
107
|
+
:rtype: Iterable[Any]
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
##### load_from
|
|
111
|
+
```
|
|
112
|
+
def load_from(filename, encoding=utf_8, **kwargs)
|
|
113
|
+
|
|
114
|
+
Deserialize a JSON Lines file into a Python iterable of objects.
|
|
115
|
+
|
|
116
|
+
:param filename: path
|
|
117
|
+
:param encoding: file encoding. 'utf-8' used by default
|
|
118
|
+
:param kwargs: `json.loads` kwargs
|
|
119
|
+
:rtype: Iterable[str]
|
|
120
|
+
|
|
121
|
+
Examples:
|
|
122
|
+
import jsonl.load_from
|
|
123
|
+
|
|
124
|
+
it = jsonl.load_from("myfile.jsonl")
|
|
125
|
+
next(it)
|
|
126
|
+
|
|
127
|
+
```
|
py_jsonl-1.0.0/jsonl.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Useful functions for working with JSON lines data as
|
|
5
|
+
described: https://jsonlines.org/
|
|
6
|
+
|
|
7
|
+
`jsonl` exposes an API similar to the `json` module from the standard library.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
__version__ = "1.0.0"
|
|
11
|
+
__all__ = [
|
|
12
|
+
"dump",
|
|
13
|
+
"dumps",
|
|
14
|
+
"dump_into",
|
|
15
|
+
"dump_fork",
|
|
16
|
+
"load",
|
|
17
|
+
"load",
|
|
18
|
+
"load_from",
|
|
19
|
+
]
|
|
20
|
+
__title__ = "py-jsonl"
|
|
21
|
+
|
|
22
|
+
import functools
|
|
23
|
+
import json
|
|
24
|
+
import os
|
|
25
|
+
|
|
26
|
+
empty = object()
|
|
27
|
+
dumps_line = functools.partial(json.dumps, ensure_ascii=False)
|
|
28
|
+
utf_8 = "utf-8"
|
|
29
|
+
new_line = "\n"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def dumper(iterable, **kwargs):
|
|
33
|
+
"""Generator yielding JSON lines."""
|
|
34
|
+
|
|
35
|
+
encode = functools.partial(dumps_line, **kwargs)
|
|
36
|
+
for obj in iter(iterable):
|
|
37
|
+
yield encode(obj)
|
|
38
|
+
yield new_line
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def dumps(iterable, **kwargs):
|
|
42
|
+
"""
|
|
43
|
+
Serialize iterable to a JSON lines formatted string.
|
|
44
|
+
|
|
45
|
+
:param Iterable[Any] iterable: Iterable of objects
|
|
46
|
+
:param kwargs: `json.dumps` kwargs
|
|
47
|
+
:rtype: str
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
return "".join(dumper(iterable, **kwargs))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def dump(iterable, fp, **kwargs):
|
|
54
|
+
"""
|
|
55
|
+
Serialize iterable as a JSON lines formatted stream to file-like object.
|
|
56
|
+
|
|
57
|
+
:param Iterable[Any] iterable: Iterable of objects
|
|
58
|
+
:param fp: file-like object
|
|
59
|
+
:param kwargs: `json.dumps` kwargs
|
|
60
|
+
|
|
61
|
+
Example:
|
|
62
|
+
import jsonl.dump
|
|
63
|
+
|
|
64
|
+
data = ({'foo': 1}, {'bar': 2})
|
|
65
|
+
with open('myfile.jsonl', mode='w', encoding='utf-8') as file:
|
|
66
|
+
jsonl.dump(data, file)
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
fp.writelines(dumper(iterable, **kwargs))
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def dump_into(filename, iterable, encoding=utf_8, **kwargs):
|
|
73
|
+
"""
|
|
74
|
+
Dump iterable to a JSON lines file.
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
import jsonl.dump_into
|
|
78
|
+
|
|
79
|
+
data = ({'foo': 1}, {'bar': 2})
|
|
80
|
+
jsonl.dump_into("myfile.jsonl", data)
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
with open(filename, mode="w", encoding=encoding) as f:
|
|
84
|
+
dump(iterable, f, **kwargs)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def dump_fork(iterable_by_path, encoding=utf_8, dump_if_empty=True, **kwargs):
|
|
88
|
+
"""
|
|
89
|
+
Incrementally dumps different groups of elements into
|
|
90
|
+
the indicated JSON lines file.
|
|
91
|
+
***Useful to reduce memory consumption***
|
|
92
|
+
|
|
93
|
+
:param Iterable[file_path, Iterable[dict]] iterable_by_path: Group items by file path
|
|
94
|
+
:param encoding: file encoding. 'utf-8' used by default
|
|
95
|
+
:param bool dump_if_empty: If false, don't create an empty JSON lines file.
|
|
96
|
+
:param kwargs: `json.dumps` kwargs
|
|
97
|
+
|
|
98
|
+
Examples:
|
|
99
|
+
import jsonl.dump_fork
|
|
100
|
+
|
|
101
|
+
path_items = (
|
|
102
|
+
("num.jsonl", ({"value": 1}, {"value": 2})),
|
|
103
|
+
("num.jsonl", ({"value": 3},)),
|
|
104
|
+
("foo.jsonl", ({"a": "1"}, {"b": 2})),
|
|
105
|
+
("baz.jsonl", ()),
|
|
106
|
+
)
|
|
107
|
+
jsonl.dump_fork(path_items)
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
def get_writer(dst):
|
|
111
|
+
nothing = True
|
|
112
|
+
with open(dst, mode="w", encoding=encoding) as fd:
|
|
113
|
+
try:
|
|
114
|
+
while True:
|
|
115
|
+
obj = yield
|
|
116
|
+
if nothing:
|
|
117
|
+
nothing = False
|
|
118
|
+
else:
|
|
119
|
+
fd.write(new_line)
|
|
120
|
+
fd.write(encoder(obj))
|
|
121
|
+
except GeneratorExit:
|
|
122
|
+
pass
|
|
123
|
+
if nothing and not dump_if_empty:
|
|
124
|
+
os.unlink(dst)
|
|
125
|
+
|
|
126
|
+
encoder = functools.partial(dumps_line, **kwargs)
|
|
127
|
+
writers = dict()
|
|
128
|
+
|
|
129
|
+
for path, iterable in iterable_by_path:
|
|
130
|
+
if path in writers:
|
|
131
|
+
writer = writers[path]
|
|
132
|
+
else:
|
|
133
|
+
writer = get_writer(path)
|
|
134
|
+
writer.send(None)
|
|
135
|
+
writers[path] = writer
|
|
136
|
+
|
|
137
|
+
for item in iterable:
|
|
138
|
+
writer.send(item)
|
|
139
|
+
# Cleanup
|
|
140
|
+
for writer in writers.values():
|
|
141
|
+
writer.close()
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def load(fp, **kwargs):
|
|
145
|
+
"""
|
|
146
|
+
Deserialize a file-like object containing JSON Lines into a Python iterable of objects.
|
|
147
|
+
|
|
148
|
+
:param fp: file-like object
|
|
149
|
+
:param kwargs: `json.loads` kwargs
|
|
150
|
+
:rtype: Iterable[Any]
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
decode = functools.partial(json.loads, **kwargs)
|
|
154
|
+
yield from map(decode, fp)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def load_from(filename, encoding=utf_8, **kwargs):
|
|
158
|
+
"""
|
|
159
|
+
Deserialize a JSON Lines file into a Python iterable of objects.
|
|
160
|
+
|
|
161
|
+
:param filename: file path
|
|
162
|
+
:param encoding: file encoding. 'utf-8' used by default
|
|
163
|
+
:param kwargs: `json.loads` kwargs
|
|
164
|
+
:rtype: Iterable[Any]
|
|
165
|
+
|
|
166
|
+
Examples:
|
|
167
|
+
import jsonl.load_from
|
|
168
|
+
|
|
169
|
+
iterable = jsonl.load_from("myfile.jsonl")
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
with open(filename, encoding=encoding) as f:
|
|
173
|
+
yield from load(f, **kwargs)
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: py-jsonl
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A Python Library for Handling JSON Lines Files
|
|
5
|
+
Home-page: https://github.com/rmoralespp/jsonl
|
|
6
|
+
Author: rmoralespp
|
|
7
|
+
Author-email: rmoralespp@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: jsonlines,ndjson,jsonl
|
|
10
|
+
Classifier: Programming Language :: Python
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Intended Audience :: Developers
|
|
18
|
+
Classifier: Intended Audience :: Information Technology
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Topic :: Internet
|
|
21
|
+
Classifier: Topic :: Utilities
|
|
22
|
+
Classifier: Topic :: File Formats :: JSON
|
|
23
|
+
Requires-Python: >=3.8
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
|
|
27
|
+
# jsonl
|
|
28
|
+
|
|
29
|
+
[](https://github.com/rmoralespp/jsonl/actions?query=event%3Arelease+workflow%3ACI)
|
|
30
|
+
[](https://pypi.python.org/pypi/jsonl-py)
|
|
31
|
+
[](https://github.com/rmoralespp/jsonl)
|
|
32
|
+
[](https://app.codecov.io/gh/rmoralespp/jsonl)
|
|
33
|
+
[](https://github.com/rmoralespp/jsonl/blob/main/LICENSE)
|
|
34
|
+
[](https://github.com/psf/black)
|
|
35
|
+
[](https://github.com/charliermarsh/ruff)
|
|
36
|
+
|
|
37
|
+
### About
|
|
38
|
+
|
|
39
|
+
jsonl is a Python Library for Handling JSON Lines Files
|
|
40
|
+
|
|
41
|
+
`jsonl` exposes an API similar to the `json` module from the standard library.
|
|
42
|
+
|
|
43
|
+
### Installation (via pip)
|
|
44
|
+
|
|
45
|
+
```pip install jsonl```
|
|
46
|
+
|
|
47
|
+
### Tests
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
(env)$ pip install -r requirements.txt # Ignore this command if it has already been executed
|
|
51
|
+
(env)$ pytest tests/
|
|
52
|
+
(env)$ pytest --cov jsonl # Tests with coverge
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Usage
|
|
56
|
+
|
|
57
|
+
##### dumps
|
|
58
|
+
```
|
|
59
|
+
dumps(iterable, **kwargs)
|
|
60
|
+
|
|
61
|
+
Serialize iterable to a JSON lines formatted string.
|
|
62
|
+
|
|
63
|
+
:param Iterable[Any] iterable: Iterable of objects
|
|
64
|
+
:param kwargs: `json.dumps` kwargs
|
|
65
|
+
:rtype: str
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
##### dump
|
|
69
|
+
```
|
|
70
|
+
dump(iterable, fp, **kwargs)
|
|
71
|
+
|
|
72
|
+
Serialize iterable as a JSON lines formatted stream to file-like object.
|
|
73
|
+
|
|
74
|
+
:param Iterable[Any] iterable: Iterable of objects
|
|
75
|
+
:param fp: file-like object
|
|
76
|
+
:param kwargs: `json.dumps` kwargs
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
import jsonl.dump
|
|
80
|
+
|
|
81
|
+
data = ({'foo': 1}, {'bar': 2})
|
|
82
|
+
with open('myfile.jsonl', mode='w', encoding='utf-8') as file:
|
|
83
|
+
jsonl.dump(data, file)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
##### dump_into
|
|
88
|
+
```
|
|
89
|
+
dump_into(filename, iterable, encoding=utf_8, **kwargs)
|
|
90
|
+
|
|
91
|
+
Dump iterable to a JSON lines file.
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
import jsonl.dump_into
|
|
95
|
+
|
|
96
|
+
data = ({'foo': 1}, {'bar': 2})
|
|
97
|
+
jsonl.dump_into("myfile.jsonl", data)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
##### dump_fork
|
|
101
|
+
```
|
|
102
|
+
dump_fork(iterable_by_path, encoding=utf_8, dump_if_empty=True, **kwargs)
|
|
103
|
+
|
|
104
|
+
Incrementally dumps different groups of elements into
|
|
105
|
+
the indicated JSON lines file.
|
|
106
|
+
***Useful to reduce memory consumption***
|
|
107
|
+
|
|
108
|
+
:param Iterable[file_path, Iterable[dict]] iterable_by_path: Group items by file path
|
|
109
|
+
:param encoding: file encoding. 'utf-8' used by default
|
|
110
|
+
:param bool dump_if_empty: If false, don't create an empty JSON lines file.
|
|
111
|
+
:param kwargs: `json.dumps` kwargs
|
|
112
|
+
|
|
113
|
+
Examples:
|
|
114
|
+
import jsonl.dump_fork
|
|
115
|
+
|
|
116
|
+
path_items = (
|
|
117
|
+
("num.jsonl", ({"value": 1}, {"value": 2})),
|
|
118
|
+
("num.jsonl", ({"value": 3},)),
|
|
119
|
+
("foo.jsonl", ({"a": "1"}, {"b": 2})),
|
|
120
|
+
("baz.jsonl", ()),
|
|
121
|
+
)
|
|
122
|
+
jsonl.dump_fork(path_items)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
##### load
|
|
126
|
+
```
|
|
127
|
+
load(fp, **kwargs)
|
|
128
|
+
|
|
129
|
+
Deserialize a file-like object containing JSON Lines into a Python iterable of objects.
|
|
130
|
+
|
|
131
|
+
:param fp: file-like object
|
|
132
|
+
:param kwargs: `json.loads` kwargs
|
|
133
|
+
:rtype: Iterable[Any]
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
##### load_from
|
|
137
|
+
```
|
|
138
|
+
def load_from(filename, encoding=utf_8, **kwargs)
|
|
139
|
+
|
|
140
|
+
Deserialize a JSON Lines file into a Python iterable of objects.
|
|
141
|
+
|
|
142
|
+
:param filename: path
|
|
143
|
+
:param encoding: file encoding. 'utf-8' used by default
|
|
144
|
+
:param kwargs: `json.loads` kwargs
|
|
145
|
+
:rtype: Iterable[str]
|
|
146
|
+
|
|
147
|
+
Examples:
|
|
148
|
+
import jsonl.load_from
|
|
149
|
+
|
|
150
|
+
it = jsonl.load_from("myfile.jsonl")
|
|
151
|
+
next(it)
|
|
152
|
+
|
|
153
|
+
```
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
jsonl.py
|
|
4
|
+
setup.py
|
|
5
|
+
py_jsonl.egg-info/PKG-INFO
|
|
6
|
+
py_jsonl.egg-info/SOURCES.txt
|
|
7
|
+
py_jsonl.egg-info/dependency_links.txt
|
|
8
|
+
py_jsonl.egg-info/not-zip-safe
|
|
9
|
+
py_jsonl.egg-info/top_level.txt
|
|
10
|
+
tests/test_dump.py
|
|
11
|
+
tests/test_dump_fork.py
|
|
12
|
+
tests/test_dump_into.py
|
|
13
|
+
tests/test_dumper.py
|
|
14
|
+
tests/test_dumps.py
|
|
15
|
+
tests/test_load.py
|
|
16
|
+
tests/test_load_from.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
jsonl
|
py_jsonl-1.0.0/setup.cfg
ADDED
py_jsonl-1.0.0/setup.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import setuptools
|
|
4
|
+
|
|
5
|
+
import jsonl
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def read(filename):
|
|
9
|
+
with open(filename, encoding="utf-8") as f:
|
|
10
|
+
return f.read()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
setuptools.setup(
|
|
14
|
+
name=jsonl.__title__,
|
|
15
|
+
version=jsonl.__version__,
|
|
16
|
+
description="A Python Library for Handling JSON Lines Files",
|
|
17
|
+
long_description=read("README.md"),
|
|
18
|
+
long_description_content_type="text/markdown",
|
|
19
|
+
classifiers=[
|
|
20
|
+
"Programming Language :: Python",
|
|
21
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
22
|
+
"Programming Language :: Python :: 3.8",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Intended Audience :: Developers",
|
|
28
|
+
"Intended Audience :: Information Technology",
|
|
29
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
30
|
+
"Topic :: Internet",
|
|
31
|
+
"Topic :: Utilities",
|
|
32
|
+
"Topic :: File Formats :: JSON",
|
|
33
|
+
],
|
|
34
|
+
keywords=["jsonlines", "ndjson", "jsonl"],
|
|
35
|
+
author="rmoralespp",
|
|
36
|
+
author_email="rmoralespp@gmail.com",
|
|
37
|
+
url="https://github.com/rmoralespp/jsonl",
|
|
38
|
+
license="MIT",
|
|
39
|
+
py_modules=["jsonl"],
|
|
40
|
+
zip_safe=False, # https://mypy.readthedocs.io/en/latest/installed_packages.html
|
|
41
|
+
python_requires=">=3.8",
|
|
42
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
|
|
5
|
+
import jsonl
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_dump_empty():
|
|
9
|
+
fp = io.StringIO()
|
|
10
|
+
jsonl.dump((), fp)
|
|
11
|
+
result = fp.getvalue()
|
|
12
|
+
assert result == ""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_dump_iter():
|
|
16
|
+
value = iter(({"foo": 1}, {"ño": 2}))
|
|
17
|
+
expected = '{"foo": 1}\n{"ño": 2}\n'
|
|
18
|
+
fp = io.StringIO()
|
|
19
|
+
jsonl.dump(value, fp)
|
|
20
|
+
result = fp.getvalue()
|
|
21
|
+
assert result == expected
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
import jsonl
|
|
9
|
+
import tests
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_dumped_iter_data():
|
|
13
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
14
|
+
foo_path = os.path.join(tmp, "foo.jsonl")
|
|
15
|
+
var_path = os.path.join(tmp, "var.jsonl")
|
|
16
|
+
baz_path = os.path.join(tmp, "baz.jsonl")
|
|
17
|
+
|
|
18
|
+
path_items = (
|
|
19
|
+
(foo_path, ({"foo": 1}, {"ño": 2})),
|
|
20
|
+
(foo_path, ({"extra": True},)),
|
|
21
|
+
(var_path, ({"foo": 1}, {"ño": 2})),
|
|
22
|
+
(baz_path, ()),
|
|
23
|
+
)
|
|
24
|
+
jsonl.dump_fork(iter(path_items))
|
|
25
|
+
|
|
26
|
+
assert tests.read(foo_path) == '{"foo": 1}\n{"ño": 2}\n{"extra": true}'
|
|
27
|
+
assert tests.read(var_path) == '{"foo": 1}\n{"ño": 2}'
|
|
28
|
+
assert tests.read(baz_path) == ""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@pytest.mark.parametrize("dump_if_empty", (True, False))
|
|
32
|
+
def test_dumped_empty_data(dump_if_empty):
|
|
33
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
34
|
+
path = os.path.join(tmp, "foo.jsonl")
|
|
35
|
+
path_items = ((path, ()),)
|
|
36
|
+
jsonl.dump_fork(iter(path_items), dump_if_empty=dump_if_empty)
|
|
37
|
+
if dump_if_empty:
|
|
38
|
+
assert tests.read(path) == ""
|
|
39
|
+
else:
|
|
40
|
+
assert not os.path.exists(path)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
6
|
+
import jsonl
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_exists_file():
|
|
10
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
11
|
+
path = os.path.join(tmp, "foo.jsonl")
|
|
12
|
+
jsonl.dump_into(path, ())
|
|
13
|
+
assert os.path.exists(path)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_dumped_iter_data():
|
|
17
|
+
value = iter(({"foo": 1}, {"ño": 2}))
|
|
18
|
+
expected = '{"foo": 1}\n{"ño": 2}\n'
|
|
19
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
20
|
+
path = os.path.join(tmp, "foo.jsonl")
|
|
21
|
+
jsonl.dump_into(path, value)
|
|
22
|
+
with open(path, encoding="utf-8") as f:
|
|
23
|
+
result = f.read()
|
|
24
|
+
assert result == expected
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import jsonl
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_empty():
|
|
7
|
+
expected = ()
|
|
8
|
+
result = jsonl.dumper(())
|
|
9
|
+
assert tuple(result) == expected
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_no_empty():
|
|
13
|
+
value = iter(({"foo": 1}, {"ño": 2}))
|
|
14
|
+
expected = ('{"foo": 1}', "\n", '{"ño": 2}', "\n")
|
|
15
|
+
result = jsonl.dumper(value)
|
|
16
|
+
assert tuple(result) == expected
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import jsonl
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_dumps_empty():
|
|
7
|
+
assert not jsonl.dumps(())
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_dumps_data():
|
|
11
|
+
expected = '{"ño": 1}\n{"foo": "var"}\n'
|
|
12
|
+
value = ({"ño": 1}, {"foo": "var"})
|
|
13
|
+
result = jsonl.dumps(iter(value))
|
|
14
|
+
assert result == expected
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
import jsonl
|
|
9
|
+
import tests
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_invalid_lines():
|
|
13
|
+
result = jsonl.load(io.StringIO("[1, 2]\n\n[3]"))
|
|
14
|
+
with pytest.raises(json.JSONDecodeError):
|
|
15
|
+
tests.consume(result)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_invalid_utf8() -> None:
|
|
19
|
+
result = jsonl.load(io.BytesIO(b"\xff\xff"))
|
|
20
|
+
with pytest.raises(UnicodeDecodeError):
|
|
21
|
+
tests.consume(result)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_load_empty():
|
|
25
|
+
result = jsonl.load(io.StringIO())
|
|
26
|
+
assert tuple(result) == ()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_load_data():
|
|
30
|
+
value = '{"foo": 1}\n{"ño": 2}\n'
|
|
31
|
+
expected = ({"foo": 1}, {"ño": 2})
|
|
32
|
+
result = jsonl.load(io.StringIO(value))
|
|
33
|
+
assert tuple(result) == expected
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
import jsonl
|
|
9
|
+
import tests
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_load_empty():
|
|
13
|
+
expected = ()
|
|
14
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
15
|
+
path = os.path.join(tmp, "foo.jsonl")
|
|
16
|
+
tests.write(path)
|
|
17
|
+
result = tuple(jsonl.load_from(path))
|
|
18
|
+
assert result == expected
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_load_data():
|
|
22
|
+
value = '{"foo": 1}\n{"ño": 2}\n'
|
|
23
|
+
expected = ({"foo": 1}, {"ño": 2})
|
|
24
|
+
|
|
25
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
26
|
+
path = os.path.join(tmp, "foo.jsonl")
|
|
27
|
+
tests.write(path, value)
|
|
28
|
+
result = tuple(jsonl.load_from(path))
|
|
29
|
+
|
|
30
|
+
assert result == expected
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_file_not_found():
|
|
34
|
+
with pytest.raises(FileNotFoundError):
|
|
35
|
+
tests.consume(jsonl.load_from("jsonl.json"))
|