py-jsonl 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py_jsonl-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Rolando Morales Perez
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.1
2
+ Name: py-jsonl
3
+ Version: 1.0.0
4
+ Summary: A Python Library for Handling JSON Lines Files
5
+ Home-page: https://github.com/rmoralespp/jsonl
6
+ Author: rmoralespp
7
+ Author-email: rmoralespp@gmail.com
8
+ License: MIT
9
+ Keywords: jsonlines,ndjson,jsonl
10
+ Classifier: Programming Language :: Python
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Intended Audience :: Information Technology
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Topic :: Internet
21
+ Classifier: Topic :: Utilities
22
+ Classifier: Topic :: File Formats :: JSON
23
+ Requires-Python: >=3.8
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+
27
+ # jsonl
28
+
29
+ [![CI](https://github.com/rmoralespp/jsonl/workflows/CI/badge.svg)](https://github.com/rmoralespp/jsonl/actions?query=event%3Arelease+workflow%3ACI)
30
+ [![pypi](https://img.shields.io/pypi/v/jsonl.svg)](https://pypi.python.org/pypi/jsonl-py)
31
+ [![versions](https://img.shields.io/pypi/pyversions/jsonl.svg)](https://github.com/rmoralespp/jsonl)
32
+ [![codecov](https://codecov.io/gh/rmoralespp/jsonl/branch/main/graph/badge.svg)](https://app.codecov.io/gh/rmoralespp/jsonl)
33
+ [![license](https://img.shields.io/github/license/rmoralespp/jsonl.svg)](https://github.com/rmoralespp/jsonl/blob/main/LICENSE)
34
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
35
+ [![Linter: ruff](https://img.shields.io/badge/linter-_ruff-orange)](https://github.com/charliermarsh/ruff)
36
+
37
+ ### About
38
+
39
+ jsonl is a Python Library for Handling JSON Lines Files
40
+
41
+ `jsonl` exposes an API similar to the `json` module from the standard library.
42
+
43
+ ### Installation (via pip)
44
+
45
+ ```pip install jsonl```
46
+
47
+ ### Tests
48
+
49
+ ```
50
+ (env)$ pip install -r requirements.txt # Ignore this command if it has already been executed
51
+ (env)$ pytest tests/
52
+ (env)$ pytest --cov jsonl # Tests with coverge
53
+ ```
54
+
55
+ ### Usage
56
+
57
+ ##### dumps
58
+ ```
59
+ dumps(iterable, **kwargs)
60
+
61
+ Serialize iterable to a JSON lines formatted string.
62
+
63
+ :param Iterable[Any] iterable: Iterable of objects
64
+ :param kwargs: `json.dumps` kwargs
65
+ :rtype: str
66
+ ```
67
+
68
+ ##### dump
69
+ ```
70
+ dump(iterable, fp, **kwargs)
71
+
72
+ Serialize iterable as a JSON lines formatted stream to file-like object.
73
+
74
+ :param Iterable[Any] iterable: Iterable of objects
75
+ :param fp: file-like object
76
+ :param kwargs: `json.dumps` kwargs
77
+
78
+ Example:
79
+ import jsonl.dump
80
+
81
+ data = ({'foo': 1}, {'bar': 2})
82
+ with open('myfile.jsonl', mode='w', encoding='utf-8') as file:
83
+ jsonl.dump(data, file)
84
+ ```
85
+
86
+
87
+ ##### dump_into
88
+ ```
89
+ dump_into(filename, iterable, encoding=utf_8, **kwargs)
90
+
91
+ Dump iterable to a JSON lines file.
92
+
93
+ Example:
94
+ import jsonl.dump_into
95
+
96
+ data = ({'foo': 1}, {'bar': 2})
97
+ jsonl.dump_into("myfile.jsonl", data)
98
+ ```
99
+
100
+ ##### dump_fork
101
+ ```
102
+ dump_fork(iterable_by_path, encoding=utf_8, dump_if_empty=True, **kwargs)
103
+
104
+ Incrementally dumps different groups of elements into
105
+ the indicated JSON lines file.
106
+ ***Useful to reduce memory consumption***
107
+
108
+ :param Iterable[file_path, Iterable[dict]] iterable_by_path: Group items by file path
109
+ :param encoding: file encoding. 'utf-8' used by default
110
+ :param bool dump_if_empty: If false, don't create an empty JSON lines file.
111
+ :param kwargs: `json.dumps` kwargs
112
+
113
+ Examples:
114
+ import jsonl.dump_fork
115
+
116
+ path_items = (
117
+ ("num.jsonl", ({"value": 1}, {"value": 2})),
118
+ ("num.jsonl", ({"value": 3},)),
119
+ ("foo.jsonl", ({"a": "1"}, {"b": 2})),
120
+ ("baz.jsonl", ()),
121
+ )
122
+ jsonl.dump_fork(path_items)
123
+ ```
124
+
125
+ ##### load
126
+ ```
127
+ load(fp, **kwargs)
128
+
129
+ Deserialize a file-like object containing JSON Lines into a Python iterable of objects.
130
+
131
+ :param fp: file-like object
132
+ :param kwargs: `json.loads` kwargs
133
+ :rtype: Iterable[Any]
134
+ ```
135
+
136
+ ##### load_from
137
+ ```
138
+ def load_from(filename, encoding=utf_8, **kwargs)
139
+
140
+ Deserialize a JSON Lines file into a Python iterable of objects.
141
+
142
+ :param filename: path
143
+ :param encoding: file encoding. 'utf-8' used by default
144
+ :param kwargs: `json.loads` kwargs
145
+ :rtype: Iterable[str]
146
+
147
+ Examples:
148
+ import jsonl.load_from
149
+
150
+ it = jsonl.load_from("myfile.jsonl")
151
+ next(it)
152
+
153
+ ```
@@ -0,0 +1,127 @@
1
+ # jsonl
2
+
3
+ [![CI](https://github.com/rmoralespp/jsonl/workflows/CI/badge.svg)](https://github.com/rmoralespp/jsonl/actions?query=event%3Arelease+workflow%3ACI)
4
+ [![pypi](https://img.shields.io/pypi/v/jsonl.svg)](https://pypi.python.org/pypi/jsonl-py)
5
+ [![versions](https://img.shields.io/pypi/pyversions/jsonl.svg)](https://github.com/rmoralespp/jsonl)
6
+ [![codecov](https://codecov.io/gh/rmoralespp/jsonl/branch/main/graph/badge.svg)](https://app.codecov.io/gh/rmoralespp/jsonl)
7
+ [![license](https://img.shields.io/github/license/rmoralespp/jsonl.svg)](https://github.com/rmoralespp/jsonl/blob/main/LICENSE)
8
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
9
+ [![Linter: ruff](https://img.shields.io/badge/linter-_ruff-orange)](https://github.com/charliermarsh/ruff)
10
+
11
+ ### About
12
+
13
+ jsonl is a Python Library for Handling JSON Lines Files
14
+
15
+ `jsonl` exposes an API similar to the `json` module from the standard library.
16
+
17
+ ### Installation (via pip)
18
+
19
+ ```pip install jsonl```
20
+
21
+ ### Tests
22
+
23
+ ```
24
+ (env)$ pip install -r requirements.txt # Ignore this command if it has already been executed
25
+ (env)$ pytest tests/
26
+ (env)$ pytest --cov jsonl # Tests with coverge
27
+ ```
28
+
29
+ ### Usage
30
+
31
+ ##### dumps
32
+ ```
33
+ dumps(iterable, **kwargs)
34
+
35
+ Serialize iterable to a JSON lines formatted string.
36
+
37
+ :param Iterable[Any] iterable: Iterable of objects
38
+ :param kwargs: `json.dumps` kwargs
39
+ :rtype: str
40
+ ```
41
+
42
+ ##### dump
43
+ ```
44
+ dump(iterable, fp, **kwargs)
45
+
46
+ Serialize iterable as a JSON lines formatted stream to file-like object.
47
+
48
+ :param Iterable[Any] iterable: Iterable of objects
49
+ :param fp: file-like object
50
+ :param kwargs: `json.dumps` kwargs
51
+
52
+ Example:
53
+ import jsonl.dump
54
+
55
+ data = ({'foo': 1}, {'bar': 2})
56
+ with open('myfile.jsonl', mode='w', encoding='utf-8') as file:
57
+ jsonl.dump(data, file)
58
+ ```
59
+
60
+
61
+ ##### dump_into
62
+ ```
63
+ dump_into(filename, iterable, encoding=utf_8, **kwargs)
64
+
65
+ Dump iterable to a JSON lines file.
66
+
67
+ Example:
68
+ import jsonl.dump_into
69
+
70
+ data = ({'foo': 1}, {'bar': 2})
71
+ jsonl.dump_into("myfile.jsonl", data)
72
+ ```
73
+
74
+ ##### dump_fork
75
+ ```
76
+ dump_fork(iterable_by_path, encoding=utf_8, dump_if_empty=True, **kwargs)
77
+
78
+ Incrementally dumps different groups of elements into
79
+ the indicated JSON lines file.
80
+ ***Useful to reduce memory consumption***
81
+
82
+ :param Iterable[file_path, Iterable[dict]] iterable_by_path: Group items by file path
83
+ :param encoding: file encoding. 'utf-8' used by default
84
+ :param bool dump_if_empty: If false, don't create an empty JSON lines file.
85
+ :param kwargs: `json.dumps` kwargs
86
+
87
+ Examples:
88
+ import jsonl.dump_fork
89
+
90
+ path_items = (
91
+ ("num.jsonl", ({"value": 1}, {"value": 2})),
92
+ ("num.jsonl", ({"value": 3},)),
93
+ ("foo.jsonl", ({"a": "1"}, {"b": 2})),
94
+ ("baz.jsonl", ()),
95
+ )
96
+ jsonl.dump_fork(path_items)
97
+ ```
98
+
99
+ ##### load
100
+ ```
101
+ load(fp, **kwargs)
102
+
103
+ Deserialize a file-like object containing JSON Lines into a Python iterable of objects.
104
+
105
+ :param fp: file-like object
106
+ :param kwargs: `json.loads` kwargs
107
+ :rtype: Iterable[Any]
108
+ ```
109
+
110
+ ##### load_from
111
+ ```
112
+ def load_from(filename, encoding=utf_8, **kwargs)
113
+
114
+ Deserialize a JSON Lines file into a Python iterable of objects.
115
+
116
+ :param filename: path
117
+ :param encoding: file encoding. 'utf-8' used by default
118
+ :param kwargs: `json.loads` kwargs
119
+ :rtype: Iterable[str]
120
+
121
+ Examples:
122
+ import jsonl.load_from
123
+
124
+ it = jsonl.load_from("myfile.jsonl")
125
+ next(it)
126
+
127
+ ```
@@ -0,0 +1,173 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """
4
+ Useful functions for working with JSON lines data as
5
+ described: https://jsonlines.org/
6
+
7
+ `jsonl` exposes an API similar to the `json` module from the standard library.
8
+ """
9
+
10
+ __version__ = "1.0.0"
11
+ __all__ = [
12
+ "dump",
13
+ "dumps",
14
+ "dump_into",
15
+ "dump_fork",
16
+ "load",
17
+ "load",
18
+ "load_from",
19
+ ]
20
+ __title__ = "py-jsonl"
21
+
22
+ import functools
23
+ import json
24
+ import os
25
+
26
+ empty = object()
27
+ dumps_line = functools.partial(json.dumps, ensure_ascii=False)
28
+ utf_8 = "utf-8"
29
+ new_line = "\n"
30
+
31
+
32
+ def dumper(iterable, **kwargs):
33
+ """Generator yielding JSON lines."""
34
+
35
+ encode = functools.partial(dumps_line, **kwargs)
36
+ for obj in iter(iterable):
37
+ yield encode(obj)
38
+ yield new_line
39
+
40
+
41
+ def dumps(iterable, **kwargs):
42
+ """
43
+ Serialize iterable to a JSON lines formatted string.
44
+
45
+ :param Iterable[Any] iterable: Iterable of objects
46
+ :param kwargs: `json.dumps` kwargs
47
+ :rtype: str
48
+ """
49
+
50
+ return "".join(dumper(iterable, **kwargs))
51
+
52
+
53
+ def dump(iterable, fp, **kwargs):
54
+ """
55
+ Serialize iterable as a JSON lines formatted stream to file-like object.
56
+
57
+ :param Iterable[Any] iterable: Iterable of objects
58
+ :param fp: file-like object
59
+ :param kwargs: `json.dumps` kwargs
60
+
61
+ Example:
62
+ import jsonl.dump
63
+
64
+ data = ({'foo': 1}, {'bar': 2})
65
+ with open('myfile.jsonl', mode='w', encoding='utf-8') as file:
66
+ jsonl.dump(data, file)
67
+ """
68
+
69
+ fp.writelines(dumper(iterable, **kwargs))
70
+
71
+
72
+ def dump_into(filename, iterable, encoding=utf_8, **kwargs):
73
+ """
74
+ Dump iterable to a JSON lines file.
75
+
76
+ Example:
77
+ import jsonl.dump_into
78
+
79
+ data = ({'foo': 1}, {'bar': 2})
80
+ jsonl.dump_into("myfile.jsonl", data)
81
+ """
82
+
83
+ with open(filename, mode="w", encoding=encoding) as f:
84
+ dump(iterable, f, **kwargs)
85
+
86
+
87
+ def dump_fork(iterable_by_path, encoding=utf_8, dump_if_empty=True, **kwargs):
88
+ """
89
+ Incrementally dumps different groups of elements into
90
+ the indicated JSON lines file.
91
+ ***Useful to reduce memory consumption***
92
+
93
+ :param Iterable[file_path, Iterable[dict]] iterable_by_path: Group items by file path
94
+ :param encoding: file encoding. 'utf-8' used by default
95
+ :param bool dump_if_empty: If false, don't create an empty JSON lines file.
96
+ :param kwargs: `json.dumps` kwargs
97
+
98
+ Examples:
99
+ import jsonl.dump_fork
100
+
101
+ path_items = (
102
+ ("num.jsonl", ({"value": 1}, {"value": 2})),
103
+ ("num.jsonl", ({"value": 3},)),
104
+ ("foo.jsonl", ({"a": "1"}, {"b": 2})),
105
+ ("baz.jsonl", ()),
106
+ )
107
+ jsonl.dump_fork(path_items)
108
+ """
109
+
110
+ def get_writer(dst):
111
+ nothing = True
112
+ with open(dst, mode="w", encoding=encoding) as fd:
113
+ try:
114
+ while True:
115
+ obj = yield
116
+ if nothing:
117
+ nothing = False
118
+ else:
119
+ fd.write(new_line)
120
+ fd.write(encoder(obj))
121
+ except GeneratorExit:
122
+ pass
123
+ if nothing and not dump_if_empty:
124
+ os.unlink(dst)
125
+
126
+ encoder = functools.partial(dumps_line, **kwargs)
127
+ writers = dict()
128
+
129
+ for path, iterable in iterable_by_path:
130
+ if path in writers:
131
+ writer = writers[path]
132
+ else:
133
+ writer = get_writer(path)
134
+ writer.send(None)
135
+ writers[path] = writer
136
+
137
+ for item in iterable:
138
+ writer.send(item)
139
+ # Cleanup
140
+ for writer in writers.values():
141
+ writer.close()
142
+
143
+
144
+ def load(fp, **kwargs):
145
+ """
146
+ Deserialize a file-like object containing JSON Lines into a Python iterable of objects.
147
+
148
+ :param fp: file-like object
149
+ :param kwargs: `json.loads` kwargs
150
+ :rtype: Iterable[Any]
151
+ """
152
+
153
+ decode = functools.partial(json.loads, **kwargs)
154
+ yield from map(decode, fp)
155
+
156
+
157
+ def load_from(filename, encoding=utf_8, **kwargs):
158
+ """
159
+ Deserialize a JSON Lines file into a Python iterable of objects.
160
+
161
+ :param filename: file path
162
+ :param encoding: file encoding. 'utf-8' used by default
163
+ :param kwargs: `json.loads` kwargs
164
+ :rtype: Iterable[Any]
165
+
166
+ Examples:
167
+ import jsonl.load_from
168
+
169
+ iterable = jsonl.load_from("myfile.jsonl")
170
+ """
171
+
172
+ with open(filename, encoding=encoding) as f:
173
+ yield from load(f, **kwargs)
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.1
2
+ Name: py-jsonl
3
+ Version: 1.0.0
4
+ Summary: A Python Library for Handling JSON Lines Files
5
+ Home-page: https://github.com/rmoralespp/jsonl
6
+ Author: rmoralespp
7
+ Author-email: rmoralespp@gmail.com
8
+ License: MIT
9
+ Keywords: jsonlines,ndjson,jsonl
10
+ Classifier: Programming Language :: Python
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: Intended Audience :: Information Technology
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Topic :: Internet
21
+ Classifier: Topic :: Utilities
22
+ Classifier: Topic :: File Formats :: JSON
23
+ Requires-Python: >=3.8
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+
27
+ # jsonl
28
+
29
+ [![CI](https://github.com/rmoralespp/jsonl/workflows/CI/badge.svg)](https://github.com/rmoralespp/jsonl/actions?query=event%3Arelease+workflow%3ACI)
30
+ [![pypi](https://img.shields.io/pypi/v/jsonl.svg)](https://pypi.python.org/pypi/jsonl-py)
31
+ [![versions](https://img.shields.io/pypi/pyversions/jsonl.svg)](https://github.com/rmoralespp/jsonl)
32
+ [![codecov](https://codecov.io/gh/rmoralespp/jsonl/branch/main/graph/badge.svg)](https://app.codecov.io/gh/rmoralespp/jsonl)
33
+ [![license](https://img.shields.io/github/license/rmoralespp/jsonl.svg)](https://github.com/rmoralespp/jsonl/blob/main/LICENSE)
34
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
35
+ [![Linter: ruff](https://img.shields.io/badge/linter-_ruff-orange)](https://github.com/charliermarsh/ruff)
36
+
37
+ ### About
38
+
39
+ jsonl is a Python Library for Handling JSON Lines Files
40
+
41
+ `jsonl` exposes an API similar to the `json` module from the standard library.
42
+
43
+ ### Installation (via pip)
44
+
45
+ ```pip install jsonl```
46
+
47
+ ### Tests
48
+
49
+ ```
50
+ (env)$ pip install -r requirements.txt # Ignore this command if it has already been executed
51
+ (env)$ pytest tests/
52
+ (env)$ pytest --cov jsonl # Tests with coverge
53
+ ```
54
+
55
+ ### Usage
56
+
57
+ ##### dumps
58
+ ```
59
+ dumps(iterable, **kwargs)
60
+
61
+ Serialize iterable to a JSON lines formatted string.
62
+
63
+ :param Iterable[Any] iterable: Iterable of objects
64
+ :param kwargs: `json.dumps` kwargs
65
+ :rtype: str
66
+ ```
67
+
68
+ ##### dump
69
+ ```
70
+ dump(iterable, fp, **kwargs)
71
+
72
+ Serialize iterable as a JSON lines formatted stream to file-like object.
73
+
74
+ :param Iterable[Any] iterable: Iterable of objects
75
+ :param fp: file-like object
76
+ :param kwargs: `json.dumps` kwargs
77
+
78
+ Example:
79
+ import jsonl.dump
80
+
81
+ data = ({'foo': 1}, {'bar': 2})
82
+ with open('myfile.jsonl', mode='w', encoding='utf-8') as file:
83
+ jsonl.dump(data, file)
84
+ ```
85
+
86
+
87
+ ##### dump_into
88
+ ```
89
+ dump_into(filename, iterable, encoding=utf_8, **kwargs)
90
+
91
+ Dump iterable to a JSON lines file.
92
+
93
+ Example:
94
+ import jsonl.dump_into
95
+
96
+ data = ({'foo': 1}, {'bar': 2})
97
+ jsonl.dump_into("myfile.jsonl", data)
98
+ ```
99
+
100
+ ##### dump_fork
101
+ ```
102
+ dump_fork(iterable_by_path, encoding=utf_8, dump_if_empty=True, **kwargs)
103
+
104
+ Incrementally dumps different groups of elements into
105
+ the indicated JSON lines file.
106
+ ***Useful to reduce memory consumption***
107
+
108
+ :param Iterable[file_path, Iterable[dict]] iterable_by_path: Group items by file path
109
+ :param encoding: file encoding. 'utf-8' used by default
110
+ :param bool dump_if_empty: If false, don't create an empty JSON lines file.
111
+ :param kwargs: `json.dumps` kwargs
112
+
113
+ Examples:
114
+ import jsonl.dump_fork
115
+
116
+ path_items = (
117
+ ("num.jsonl", ({"value": 1}, {"value": 2})),
118
+ ("num.jsonl", ({"value": 3},)),
119
+ ("foo.jsonl", ({"a": "1"}, {"b": 2})),
120
+ ("baz.jsonl", ()),
121
+ )
122
+ jsonl.dump_fork(path_items)
123
+ ```
124
+
125
+ ##### load
126
+ ```
127
+ load(fp, **kwargs)
128
+
129
+ Deserialize a file-like object containing JSON Lines into a Python iterable of objects.
130
+
131
+ :param fp: file-like object
132
+ :param kwargs: `json.loads` kwargs
133
+ :rtype: Iterable[Any]
134
+ ```
135
+
136
+ ##### load_from
137
+ ```
138
+ def load_from(filename, encoding=utf_8, **kwargs)
139
+
140
+ Deserialize a JSON Lines file into a Python iterable of objects.
141
+
142
+ :param filename: path
143
+ :param encoding: file encoding. 'utf-8' used by default
144
+ :param kwargs: `json.loads` kwargs
145
+ :rtype: Iterable[str]
146
+
147
+ Examples:
148
+ import jsonl.load_from
149
+
150
+ it = jsonl.load_from("myfile.jsonl")
151
+ next(it)
152
+
153
+ ```
@@ -0,0 +1,16 @@
1
+ LICENSE
2
+ README.md
3
+ jsonl.py
4
+ setup.py
5
+ py_jsonl.egg-info/PKG-INFO
6
+ py_jsonl.egg-info/SOURCES.txt
7
+ py_jsonl.egg-info/dependency_links.txt
8
+ py_jsonl.egg-info/not-zip-safe
9
+ py_jsonl.egg-info/top_level.txt
10
+ tests/test_dump.py
11
+ tests/test_dump_fork.py
12
+ tests/test_dump_into.py
13
+ tests/test_dumper.py
14
+ tests/test_dumps.py
15
+ tests/test_load.py
16
+ tests/test_load_from.py
@@ -0,0 +1 @@
1
+ jsonl
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,42 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import setuptools
4
+
5
+ import jsonl
6
+
7
+
8
+ def read(filename):
9
+ with open(filename, encoding="utf-8") as f:
10
+ return f.read()
11
+
12
+
13
+ setuptools.setup(
14
+ name=jsonl.__title__,
15
+ version=jsonl.__version__,
16
+ description="A Python Library for Handling JSON Lines Files",
17
+ long_description=read("README.md"),
18
+ long_description_content_type="text/markdown",
19
+ classifiers=[
20
+ "Programming Language :: Python",
21
+ "Programming Language :: Python :: 3 :: Only",
22
+ "Programming Language :: Python :: 3.8",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "Programming Language :: Python :: 3.12",
27
+ "Intended Audience :: Developers",
28
+ "Intended Audience :: Information Technology",
29
+ "Topic :: Software Development :: Libraries :: Python Modules",
30
+ "Topic :: Internet",
31
+ "Topic :: Utilities",
32
+ "Topic :: File Formats :: JSON",
33
+ ],
34
+ keywords=["jsonlines", "ndjson", "jsonl"],
35
+ author="rmoralespp",
36
+ author_email="rmoralespp@gmail.com",
37
+ url="https://github.com/rmoralespp/jsonl",
38
+ license="MIT",
39
+ py_modules=["jsonl"],
40
+ zip_safe=False, # https://mypy.readthedocs.io/en/latest/installed_packages.html
41
+ python_requires=">=3.8",
42
+ )
@@ -0,0 +1,21 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import io
4
+
5
+ import jsonl
6
+
7
+
8
+ def test_dump_empty():
9
+ fp = io.StringIO()
10
+ jsonl.dump((), fp)
11
+ result = fp.getvalue()
12
+ assert result == ""
13
+
14
+
15
+ def test_dump_iter():
16
+ value = iter(({"foo": 1}, {"ño": 2}))
17
+ expected = '{"foo": 1}\n{"ño": 2}\n'
18
+ fp = io.StringIO()
19
+ jsonl.dump(value, fp)
20
+ result = fp.getvalue()
21
+ assert result == expected
@@ -0,0 +1,40 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import os
4
+ import tempfile
5
+
6
+ import pytest
7
+
8
+ import jsonl
9
+ import tests
10
+
11
+
12
+ def test_dumped_iter_data():
13
+ with tempfile.TemporaryDirectory() as tmp:
14
+ foo_path = os.path.join(tmp, "foo.jsonl")
15
+ var_path = os.path.join(tmp, "var.jsonl")
16
+ baz_path = os.path.join(tmp, "baz.jsonl")
17
+
18
+ path_items = (
19
+ (foo_path, ({"foo": 1}, {"ño": 2})),
20
+ (foo_path, ({"extra": True},)),
21
+ (var_path, ({"foo": 1}, {"ño": 2})),
22
+ (baz_path, ()),
23
+ )
24
+ jsonl.dump_fork(iter(path_items))
25
+
26
+ assert tests.read(foo_path) == '{"foo": 1}\n{"ño": 2}\n{"extra": true}'
27
+ assert tests.read(var_path) == '{"foo": 1}\n{"ño": 2}'
28
+ assert tests.read(baz_path) == ""
29
+
30
+
31
+ @pytest.mark.parametrize("dump_if_empty", (True, False))
32
+ def test_dumped_empty_data(dump_if_empty):
33
+ with tempfile.TemporaryDirectory() as tmp:
34
+ path = os.path.join(tmp, "foo.jsonl")
35
+ path_items = ((path, ()),)
36
+ jsonl.dump_fork(iter(path_items), dump_if_empty=dump_if_empty)
37
+ if dump_if_empty:
38
+ assert tests.read(path) == ""
39
+ else:
40
+ assert not os.path.exists(path)
@@ -0,0 +1,24 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import os
4
+ import tempfile
5
+
6
+ import jsonl
7
+
8
+
9
+ def test_exists_file():
10
+ with tempfile.TemporaryDirectory() as tmp:
11
+ path = os.path.join(tmp, "foo.jsonl")
12
+ jsonl.dump_into(path, ())
13
+ assert os.path.exists(path)
14
+
15
+
16
+ def test_dumped_iter_data():
17
+ value = iter(({"foo": 1}, {"ño": 2}))
18
+ expected = '{"foo": 1}\n{"ño": 2}\n'
19
+ with tempfile.TemporaryDirectory() as tmp:
20
+ path = os.path.join(tmp, "foo.jsonl")
21
+ jsonl.dump_into(path, value)
22
+ with open(path, encoding="utf-8") as f:
23
+ result = f.read()
24
+ assert result == expected
@@ -0,0 +1,16 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import jsonl
4
+
5
+
6
+ def test_empty():
7
+ expected = ()
8
+ result = jsonl.dumper(())
9
+ assert tuple(result) == expected
10
+
11
+
12
+ def test_no_empty():
13
+ value = iter(({"foo": 1}, {"ño": 2}))
14
+ expected = ('{"foo": 1}', "\n", '{"ño": 2}', "\n")
15
+ result = jsonl.dumper(value)
16
+ assert tuple(result) == expected
@@ -0,0 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import jsonl
4
+
5
+
6
+ def test_dumps_empty():
7
+ assert not jsonl.dumps(())
8
+
9
+
10
+ def test_dumps_data():
11
+ expected = '{"ño": 1}\n{"foo": "var"}\n'
12
+ value = ({"ño": 1}, {"foo": "var"})
13
+ result = jsonl.dumps(iter(value))
14
+ assert result == expected
@@ -0,0 +1,33 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import io
4
+ import json
5
+
6
+ import pytest
7
+
8
+ import jsonl
9
+ import tests
10
+
11
+
12
+ def test_invalid_lines():
13
+ result = jsonl.load(io.StringIO("[1, 2]\n\n[3]"))
14
+ with pytest.raises(json.JSONDecodeError):
15
+ tests.consume(result)
16
+
17
+
18
+ def test_invalid_utf8() -> None:
19
+ result = jsonl.load(io.BytesIO(b"\xff\xff"))
20
+ with pytest.raises(UnicodeDecodeError):
21
+ tests.consume(result)
22
+
23
+
24
+ def test_load_empty():
25
+ result = jsonl.load(io.StringIO())
26
+ assert tuple(result) == ()
27
+
28
+
29
+ def test_load_data():
30
+ value = '{"foo": 1}\n{"ño": 2}\n'
31
+ expected = ({"foo": 1}, {"ño": 2})
32
+ result = jsonl.load(io.StringIO(value))
33
+ assert tuple(result) == expected
@@ -0,0 +1,35 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import os
4
+ import tempfile
5
+
6
+ import pytest
7
+
8
+ import jsonl
9
+ import tests
10
+
11
+
12
+ def test_load_empty():
13
+ expected = ()
14
+ with tempfile.TemporaryDirectory() as tmp:
15
+ path = os.path.join(tmp, "foo.jsonl")
16
+ tests.write(path)
17
+ result = tuple(jsonl.load_from(path))
18
+ assert result == expected
19
+
20
+
21
+ def test_load_data():
22
+ value = '{"foo": 1}\n{"ño": 2}\n'
23
+ expected = ({"foo": 1}, {"ño": 2})
24
+
25
+ with tempfile.TemporaryDirectory() as tmp:
26
+ path = os.path.join(tmp, "foo.jsonl")
27
+ tests.write(path, value)
28
+ result = tuple(jsonl.load_from(path))
29
+
30
+ assert result == expected
31
+
32
+
33
+ def test_file_not_found():
34
+ with pytest.raises(FileNotFoundError):
35
+ tests.consume(jsonl.load_from("jsonl.json"))