opticol 0.1.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.4
2
+ Name: opticol
3
+ Version: 0.1.0a1
4
+ Summary: Implementations for the various Python Collection ABCs that optimize along various axes.
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+
8
+ ## Opticol
9
+
10
+ Optimized collections (hence *opticol*) for Python. This package provides memory optimized versions of the base Python collection types which are:
11
+
12
+ * (Mutable)Sequence
13
+ * (Mutable)Mapping
14
+ * (Mutable)Set
15
+
16
+ The insight behind the package is the following: the size of an empty set is 216 bytes (on Python 3.14) but the size of an empty object with an empty __slots__ member is only 32 bytes. Python programs that hold large datasets in memory could benefit from using these optimized collections which fully implement the respective collection ABCs, but at a fraction of the runtime memory.
17
+
18
+ So for general users these optimizations will not be worth if if the dataset being used comfortably fits in memory, but applications which currently create tens or hundreds of thousand of Python objects could dramatically lower memory usage without API changes.
19
+
20
+ ## Usage
21
+
22
+ The optimized classes could be used directly, by creating an EmptySequence directly for example, but the recommended usage is to use the collection level `project` method which tries to project a collection instance into the memory optimized variants automatically. Additionally, there is a factory interface that could be plugged in to allow for different strategies beyond the typical `project` logic.
23
+
24
+ Consider the following example:
25
+
26
+ ```
27
+ import opticol
28
+
29
+ optimized_list = opticol.seq_project([]) # Actually an instance of EmptySequence
30
+ optimized_list_single = opticol.mut_seq_project(("MyString",)) # Actually an instance of Small1MutableSequence
31
+ ```
32
+
33
+ A small note that in the current implementation, optimization is only in one direction. That is, if the MutableSequence type is optimized for collections of size 0, 1, 2, 3, then once an operation pushes it past into size 4, further decreasing of the size will not restore the optimization.
@@ -0,0 +1,26 @@
1
+ ## Opticol
2
+
3
+ Optimized collections (hence *opticol*) for Python. This package provides memory optimized versions of the base Python collection types which are:
4
+
5
+ * (Mutable)Sequence
6
+ * (Mutable)Mapping
7
+ * (Mutable)Set
8
+
9
+ The insight behind the package is the following: the size of an empty set is 216 bytes (on Python 3.14) but the size of an empty object with an empty __slots__ member is only 32 bytes. Python programs that hold large datasets in memory could benefit from using these optimized collections which fully implement the respective collection ABCs, but at a fraction of the runtime memory.
10
+
11
+ So for general users these optimizations will not be worth if if the dataset being used comfortably fits in memory, but applications which currently create tens or hundreds of thousand of Python objects could dramatically lower memory usage without API changes.
12
+
13
+ ## Usage
14
+
15
+ The optimized classes could be used directly, by creating an EmptySequence directly for example, but the recommended usage is to use the collection level `project` method which tries to project a collection instance into the memory optimized variants automatically. Additionally, there is a factory interface that could be plugged in to allow for different strategies beyond the typical `project` logic.
16
+
17
+ Consider the following example:
18
+
19
+ ```
20
+ import opticol
21
+
22
+ optimized_list = opticol.seq_project([]) # Actually an instance of EmptySequence
23
+ optimized_list_single = opticol.mut_seq_project(("MyString",)) # Actually an instance of Small1MutableSequence
24
+ ```
25
+
26
+ A small note that in the current implementation, optimization is only in one direction. That is, if the MutableSequence type is optimized for collections of size 0, 1, 2, 3, then once an operation pushes it past into size 4, further decreasing of the size will not restore the optimization.
@@ -0,0 +1,27 @@
1
+ __all__ = [
2
+ "mapping",
3
+ "mutable_mapping",
4
+ "mutable_sequence",
5
+ "mutable_set",
6
+ "projector",
7
+ "sequence",
8
+ "set",
9
+ ]
10
+
11
+ from opticol import mutable_mapping, mutable_sequence, mutable_set, sequence
12
+ from opticol import mapping as _mapping_module
13
+ from opticol import set as _set_module
14
+
15
+ mapping = _mapping_module.project
16
+ mut_mapping = mutable_mapping.project
17
+ mut_seq = mutable_sequence.project
18
+ mut_set = mutable_set.project
19
+ seq = sequence.project
20
+ set = _set_module.project
21
+
22
+ del _mapping_module
23
+ del mutable_mapping
24
+ del mutable_sequence
25
+ del mutable_set
26
+ del sequence
27
+ del _set_module
@@ -0,0 +1,165 @@
1
+ from abc import ABCMeta
2
+ from collections.abc import Sequence
3
+ from itertools import zip_longest
4
+ from typing import Any
5
+
6
+
7
+ class OptimizedMappingMeta(ABCMeta):
8
+ def __new__(
9
+ mcs,
10
+ name: str,
11
+ bases: tuple[type, ...],
12
+ namespace: dict[str, Any],
13
+ *,
14
+ internal_size: int,
15
+ ) -> type:
16
+ slots = tuple(f"_item{i}" for i in range(internal_size))
17
+ namespace["__slots__"] = slots
18
+
19
+ mcs._add_methods(slots, namespace, internal_size)
20
+
21
+ return super().__new__(mcs, name, bases, namespace)
22
+
23
+ @staticmethod
24
+ def _add_methods(
25
+ item_slots: Sequence[str],
26
+ namespace: dict[str, Any],
27
+ internal_size: int,
28
+ ) -> None:
29
+ if internal_size > 0:
30
+ init_ir = f"""
31
+ def __init__(self, {",".join(item_slots)}):
32
+ {"\n ".join(f"self.{slot} = {slot}" for slot in item_slots)}
33
+ """
34
+ exec(init_ir, namespace)
35
+
36
+ def __getitem__(self, key):
37
+ for slot in item_slots:
38
+ item = getattr(self, slot)
39
+ if item[0] == key:
40
+ return item[1]
41
+ raise KeyError(key)
42
+
43
+ def __iter__(self):
44
+ yield from (getattr(self, slot)[0] for slot in item_slots)
45
+
46
+ def __len__(_):
47
+ return internal_size
48
+
49
+ def __repr__(self):
50
+ items = [
51
+ f"{repr(getattr(self, slot)[0])}: {repr(getattr(self, slot)[1])}"
52
+ for slot in item_slots
53
+ ]
54
+ return f"{{{", ".join(items)}}}"
55
+
56
+ namespace["__getitem__"] = __getitem__
57
+ namespace["__iter__"] = __iter__
58
+ namespace["__len__"] = __len__
59
+ namespace["__repr__"] = __repr__
60
+
61
+
62
+ class OptimizedMutableMappingMeta(ABCMeta):
63
+ def __new__(
64
+ mcs,
65
+ name: str,
66
+ bases: tuple[type, ...],
67
+ namespace: dict[str, Any],
68
+ *,
69
+ internal_size: int,
70
+ ) -> type:
71
+ if internal_size <= 0:
72
+ raise ValueError(f"{internal_size} is not a valid size for the MutableMapping type.")
73
+
74
+ slots = tuple(f"_item{i}" for i in range(internal_size))
75
+ namespace["__slots__"] = slots
76
+
77
+ mcs._add_methods(slots, namespace, internal_size)
78
+
79
+ return super().__new__(mcs, name, bases, namespace)
80
+
81
+ @staticmethod
82
+ def _add_methods(
83
+ item_slots: Sequence[str],
84
+ namespace: dict[str, Any],
85
+ internal_size: int,
86
+ ) -> None:
87
+ def _assign_dict(self, d):
88
+ if len(d) > internal_size:
89
+ setattr(self, item_slots[0], d)
90
+ for slot in item_slots[1:]:
91
+ setattr(self, slot, None)
92
+ else:
93
+ sentinel = object()
94
+ for pair, slot in zip_longest(d.items(), item_slots, fillvalue=sentinel):
95
+ if pair is sentinel:
96
+ setattr(self, slot, None)
97
+ else:
98
+ setattr(self, slot, pair)
99
+
100
+ def __init__(self, it):
101
+ d = it if isinstance(it, dict) else dict(it)
102
+ _assign_dict(self, d)
103
+
104
+ def __getitem__(self, key):
105
+ first = getattr(self, item_slots[0])
106
+ if isinstance(first, dict):
107
+ return first[key]
108
+
109
+ for slot in item_slots:
110
+ item = getattr(self, slot)
111
+ if item is None:
112
+ break
113
+
114
+ if item[0] == key:
115
+ return item[1]
116
+
117
+ raise KeyError(key)
118
+
119
+ def __setitem__(self, key, value):
120
+ current = dict(self)
121
+ current[key] = value
122
+ _assign_dict(self, current)
123
+
124
+ def __delitem__(self, key):
125
+ current = dict(self)
126
+ del current[key]
127
+ _assign_dict(self, current)
128
+
129
+ def __iter__(self):
130
+ first = getattr(self, item_slots[0])
131
+ if isinstance(first, dict):
132
+ yield from first
133
+ return
134
+
135
+ for slot in item_slots:
136
+ item = getattr(self, slot)
137
+ if item is None:
138
+ return
139
+
140
+ yield item[0]
141
+
142
+ def __len__(self):
143
+ first = getattr(self, item_slots[0])
144
+ if isinstance(first, dict):
145
+ return len(first)
146
+
147
+ count = 0
148
+ for slot in item_slots:
149
+ if getattr(self, slot) is None:
150
+ break
151
+ count += 1
152
+
153
+ return count
154
+
155
+ def __repr__(self):
156
+ items = [f"{repr(k)}: {repr(v)}" for k, v in self.items()]
157
+ return f"{{{", ".join(items)}}}"
158
+
159
+ namespace["__init__"] = __init__
160
+ namespace["__getitem__"] = __getitem__
161
+ namespace["__setitem__"] = __setitem__
162
+ namespace["__delitem__"] = __delitem__
163
+ namespace["__iter__"] = __iter__
164
+ namespace["__len__"] = __len__
165
+ namespace["__repr__"] = __repr__
@@ -0,0 +1,13 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any
3
+
4
+
5
+ class EndMarker: ...
6
+
7
+
8
+ END = EndMarker()
9
+
10
+
11
+ @dataclass(slots=True, frozen=True)
12
+ class Overflow:
13
+ data: Any
@@ -0,0 +1,179 @@
1
+ from abc import ABCMeta
2
+ from itertools import zip_longest
3
+ from typing import Any, Callable
4
+
5
+ from collections.abc import Sequence
6
+
7
+ from opticol._sentinel import END, Overflow
8
+
9
+
10
+ def _adjust_index(idx: int, length: int) -> int:
11
+ adjusted = idx if idx >= 0 else length + idx
12
+ if adjusted < 0 or adjusted >= length:
13
+ raise IndexError(f"{adjusted} is outside of the expected bounds.")
14
+ return adjusted
15
+
16
+
17
+ class OptimizedSequenceMeta(ABCMeta):
18
+ def __new__(
19
+ mcs,
20
+ name: str,
21
+ bases: tuple[type, ...],
22
+ namespace: dict[str, Any],
23
+ *,
24
+ internal_size: int,
25
+ project: Callable[[list], Sequence],
26
+ ) -> type:
27
+ slots = tuple(f"_item{i}" for i in range(internal_size))
28
+ namespace["__slots__"] = slots
29
+
30
+ mcs._add_methods(slots, namespace, internal_size, project)
31
+
32
+ return super().__new__(mcs, name, bases, namespace)
33
+
34
+ @staticmethod
35
+ def _add_methods(
36
+ item_slots: Sequence[str],
37
+ namespace: dict[str, Any],
38
+ internal_size: int,
39
+ project: Callable[[list], Sequence],
40
+ ) -> None:
41
+ if internal_size > 0:
42
+ init_ir = f"""
43
+ def __init__(self, {",".join(item_slots)}):
44
+ {"\n ".join(f"self.{slot} = {slot}" for slot in item_slots)}
45
+ """
46
+ exec(init_ir, namespace)
47
+
48
+ def __getitem__(self, key):
49
+ match key:
50
+ case int():
51
+ key = _adjust_index(key, len(self))
52
+ return getattr(self, item_slots[key])
53
+ case slice():
54
+ indices = range(*key.indices(len(self)))
55
+ return project([self[i] for i in indices])
56
+ case _:
57
+ raise TypeError(
58
+ f"Sequence accessors must be integers or slices, not {type(key)}"
59
+ )
60
+
61
+ def __len__(_):
62
+ return internal_size
63
+
64
+ def __repr__(self):
65
+ return f"[{", ".join(repr(getattr(self, slot)) for slot in item_slots)}]"
66
+
67
+ namespace["__getitem__"] = __getitem__
68
+ namespace["__len__"] = __len__
69
+ namespace["__repr__"] = __repr__
70
+
71
+
72
+ class OptimizedMutableSequenceMeta(ABCMeta):
73
+ def __new__(
74
+ mcs,
75
+ name: str,
76
+ bases: tuple[type, ...],
77
+ namespace: dict[str, Any],
78
+ *,
79
+ internal_size: int,
80
+ project: Callable[[list], Sequence],
81
+ ) -> type:
82
+ if internal_size <= 0:
83
+ raise ValueError(f"{internal_size} is not a valid size for the MutableSequence type.")
84
+
85
+ slots = tuple(f"_item{i}" for i in range(internal_size))
86
+ namespace["__slots__"] = slots
87
+
88
+ mcs._add_methods(slots, namespace, internal_size, project)
89
+
90
+ return super().__new__(mcs, name, bases, namespace)
91
+
92
+ @staticmethod
93
+ def _add_methods(
94
+ item_slots: Sequence[str],
95
+ namespace: dict[str, Any],
96
+ internal_size: int,
97
+ project: Callable[[list], Sequence],
98
+ ) -> None:
99
+ def _assign_list(self, l):
100
+ if len(l) > internal_size:
101
+ setattr(self, item_slots[0], Overflow(l))
102
+ for slot in item_slots[1:]:
103
+ setattr(self, slot, END)
104
+ else:
105
+ sentinel = object()
106
+ for slot, v in zip_longest(item_slots, l, fillvalue=sentinel):
107
+ if v is sentinel:
108
+ setattr(self, slot, END)
109
+ else:
110
+ setattr(self, slot, v)
111
+
112
+ def __init__(self, it):
113
+ collected = it if isinstance(it, list) else list(it)
114
+ _assign_list(self, collected)
115
+
116
+ def __getitem__(self, key):
117
+ first = getattr(self, item_slots[0])
118
+ overflowed = isinstance(first, Overflow)
119
+
120
+ match key:
121
+ case int():
122
+ if overflowed:
123
+ return first.data[key]
124
+
125
+ key = _adjust_index(key, len(self))
126
+ v = getattr(self, item_slots[key])
127
+ if v is END:
128
+ raise IndexError(f"{key} is outside of the expected bounds.")
129
+ return v
130
+ case slice():
131
+ if overflowed:
132
+ return project(first.data[key])
133
+
134
+ indices = range(*key.indices(len(self)))
135
+ first = getattr(self, item_slots[0])
136
+ return project([self[i] for i in indices])
137
+ case _:
138
+ raise TypeError(
139
+ f"Sequence accessors must be integers or slices, not {type(key)}"
140
+ )
141
+
142
+ def __setitem__(self, key, value):
143
+ current = list(self)
144
+ current[key] = value
145
+ _assign_list(self, current)
146
+
147
+ def __delitem__(self, key):
148
+ current = list(self)
149
+ del current[key]
150
+ _assign_list(self, current)
151
+
152
+ def __len__(self):
153
+ first = getattr(self, item_slots[0])
154
+ if isinstance(first, Overflow):
155
+ return len(first.data)
156
+
157
+ count = 0
158
+ for slot in item_slots:
159
+ if getattr(self, slot) is END:
160
+ break
161
+ count += 1
162
+
163
+ return count
164
+
165
+ def insert(self, index, value):
166
+ current = list(self)
167
+ current.insert(index, value)
168
+ _assign_list(self, current)
169
+
170
+ def __repr__(self):
171
+ return f"[{", ".join(repr(val) for val in self)}]"
172
+
173
+ namespace["__init__"] = __init__
174
+ namespace["__getitem__"] = __getitem__
175
+ namespace["__setitem__"] = __setitem__
176
+ namespace["__delitem__"] = __delitem__
177
+ namespace["__len__"] = __len__
178
+ namespace["insert"] = insert
179
+ namespace["__repr__"] = __repr__
@@ -0,0 +1,176 @@
1
+ from abc import ABCMeta
2
+ from itertools import zip_longest
3
+ from typing import Any, Callable
4
+
5
+ from collections.abc import Sequence, Set
6
+
7
+ from opticol._sentinel import END, Overflow
8
+
9
+
10
+ class OptimizedSetMeta(ABCMeta):
11
+ def __new__(
12
+ mcs,
13
+ name: str,
14
+ bases: tuple[type, ...],
15
+ namespace: dict[str, Any],
16
+ *,
17
+ internal_size: int,
18
+ project: Callable[[set], Set],
19
+ ) -> type:
20
+ slots = tuple(f"_item{i}" for i in range(internal_size))
21
+ namespace["__slots__"] = slots
22
+
23
+ mcs._add_methods(slots, namespace, internal_size, project)
24
+
25
+ return super().__new__(mcs, name, bases, namespace)
26
+
27
+ @staticmethod
28
+ def _add_methods(
29
+ item_slots: Sequence[str],
30
+ namespace: dict[str, Any],
31
+ internal_size: int,
32
+ project: Callable[[set], Set],
33
+ ) -> None:
34
+ if internal_size > 0:
35
+ init_ir = f"""
36
+ def __init__(self, {",".join(item_slots)}):
37
+ {"\n ".join(f"self.{slot} = {slot}" for slot in item_slots)}
38
+ """
39
+ exec(init_ir, namespace)
40
+
41
+ def __contains__(self, value):
42
+ for slot in item_slots:
43
+ if getattr(self, slot) == value:
44
+ return True
45
+ return False
46
+
47
+ def __iter__(self):
48
+ for slot in item_slots:
49
+ yield getattr(self, slot)
50
+
51
+ def __len__(_):
52
+ return internal_size
53
+
54
+ def __repr__(self):
55
+ if internal_size == 0:
56
+ return "set()"
57
+ return f"{{{", ".join(repr(getattr(self, slot)) for slot in item_slots)}}}"
58
+
59
+ def _from_iterable(_, it):
60
+ return project(set(it))
61
+
62
+ namespace["__contains__"] = __contains__
63
+ namespace["__iter__"] = __iter__
64
+ namespace["__len__"] = __len__
65
+ namespace["__repr__"] = __repr__
66
+ namespace["_from_iterable"] = classmethod(_from_iterable)
67
+
68
+
69
+ class OptimizedMutableSetMeta(ABCMeta):
70
+ def __new__(
71
+ mcs,
72
+ name: str,
73
+ bases: tuple[type, ...],
74
+ namespace: dict[str, Any],
75
+ *,
76
+ internal_size: int,
77
+ project: Callable[[set], Set],
78
+ ) -> type:
79
+ if internal_size <= 0:
80
+ raise ValueError(f"{internal_size} is not a valid size for the MutableSet type.")
81
+
82
+ slots = tuple(f"_item{i}" for i in range(internal_size))
83
+ namespace["__slots__"] = slots
84
+
85
+ mcs._add_methods(slots, namespace, internal_size, project)
86
+
87
+ return super().__new__(mcs, name, bases, namespace)
88
+
89
+ @staticmethod
90
+ def _add_methods(
91
+ item_slots: Sequence[str],
92
+ namespace: dict[str, Any],
93
+ internal_size: int,
94
+ project: Callable[[set], Set],
95
+ ) -> None:
96
+ def _assign_set(self, s):
97
+ if len(s) > internal_size:
98
+ setattr(self, item_slots[0], Overflow(s))
99
+ for slot in item_slots[1:]:
100
+ setattr(self, slot, END)
101
+ else:
102
+ sentinel = object()
103
+ for slot, v in zip_longest(item_slots, s, fillvalue=sentinel):
104
+ if v is sentinel:
105
+ setattr(self, slot, END)
106
+ else:
107
+ setattr(self, slot, v)
108
+
109
+ def __init__(self, it):
110
+ collected = it if isinstance(it, set) else set(it)
111
+ _assign_set(self, collected)
112
+
113
+ def __contains__(self, value):
114
+ first = getattr(self, item_slots[0])
115
+ if isinstance(first, Overflow):
116
+ return value in first.data
117
+
118
+ for slot in item_slots:
119
+ v = getattr(self, slot)
120
+ if v is END:
121
+ break
122
+ if v == value:
123
+ return True
124
+ return False
125
+
126
+ def __iter__(self):
127
+ first = getattr(self, item_slots[0])
128
+ if isinstance(first, Overflow):
129
+ yield from first.data
130
+ return
131
+
132
+ for slot in item_slots:
133
+ v = getattr(self, slot)
134
+ if v is END:
135
+ break
136
+ yield v
137
+
138
+ def __len__(self):
139
+ first = getattr(self, item_slots[0])
140
+ if isinstance(first, Overflow):
141
+ return len(first.data)
142
+
143
+ count = 0
144
+ for slot in item_slots:
145
+ if getattr(self, slot) is END:
146
+ break
147
+ count += 1
148
+
149
+ return count
150
+
151
+ def add(self, value):
152
+ current = set(self)
153
+ current.add(value)
154
+ _assign_set(self, current)
155
+
156
+ def discard(self, value):
157
+ current = set(self)
158
+ current.discard(value)
159
+ _assign_set(self, current)
160
+
161
+ def __repr__(self):
162
+ if len(self) == 0:
163
+ return "set()"
164
+ return f"{{{", ".join(repr(val) for val in self)}}}"
165
+
166
+ def _from_iterable(_, it):
167
+ return project(set(it))
168
+
169
+ namespace["__init__"] = __init__
170
+ namespace["__contains__"] = __contains__
171
+ namespace["__iter__"] = __iter__
172
+ namespace["__len__"] = __len__
173
+ namespace["add"] = add
174
+ namespace["discard"] = discard
175
+ namespace["__repr__"] = __repr__
176
+ namespace["_from_iterable"] = classmethod(_from_iterable)
@@ -0,0 +1,26 @@
1
+ from collections.abc import Mapping
2
+ from opticol._mapping import OptimizedMappingMeta
3
+
4
+
5
+ def _create_mapping_class(size: int) -> type:
6
+ return OptimizedMappingMeta(f"_Size{size}Mapping", (Mapping,), {}, internal_size=size)
7
+
8
+
9
+ _by_size: list[type] = []
10
+
11
+
12
+ def project[K, V](original: Mapping[K, V]) -> Mapping[K, V]:
13
+ if len(original) >= len(_by_size):
14
+ return original
15
+
16
+ ctor = _by_size[len(original)]
17
+ items = tuple(original.items())
18
+ return ctor(*items)
19
+
20
+
21
+ _Size0Mapping = _create_mapping_class(0)
22
+ _Size1Mapping = _create_mapping_class(1)
23
+ _Size2Mapping = _create_mapping_class(2)
24
+ _Size3Mapping = _create_mapping_class(3)
25
+
26
+ _by_size.extend([_Size0Mapping, _Size1Mapping, _Size2Mapping, _Size3Mapping])
@@ -0,0 +1,31 @@
1
+ from collections.abc import MutableMapping
2
+ from opticol._mapping import OptimizedMutableMappingMeta
3
+
4
+
5
+ def _create_mut_mapping_class(size: int) -> type:
6
+ return OptimizedMutableMappingMeta(
7
+ f"_Size{size}MutableMapping",
8
+ (MutableMapping,),
9
+ {},
10
+ internal_size=size,
11
+ )
12
+
13
+
14
+ _by_size: list[type] = []
15
+
16
+
17
+ def project[K, V](original: MutableMapping[K, V]) -> MutableMapping[K, V]:
18
+ if len(original) >= len(_by_size):
19
+ return original
20
+
21
+ ctor = _by_size[len(original)]
22
+ return ctor(original)
23
+
24
+
25
+ _Size1MutableMapping = _create_mut_mapping_class(1)
26
+ _Size2MutableMapping = _create_mut_mapping_class(2)
27
+ _Size3MutableMapping = _create_mut_mapping_class(3)
28
+
29
+ _by_size.extend(
30
+ [_Size1MutableMapping, _Size1MutableMapping, _Size2MutableMapping, _Size3MutableMapping]
31
+ )
@@ -0,0 +1,32 @@
1
+ from collections.abc import MutableSequence
2
+ from opticol._sequence import OptimizedMutableSequenceMeta
3
+
4
+
5
+ def _create_mut_seq_class(size: int) -> type:
6
+ return OptimizedMutableSequenceMeta(
7
+ f"_Size{size}MutableSequence",
8
+ (MutableSequence,),
9
+ {},
10
+ internal_size=size,
11
+ project=project,
12
+ )
13
+
14
+
15
+ _by_size: list[type] = []
16
+
17
+
18
+ def project[T](original: MutableSequence[T]) -> MutableSequence[T]:
19
+ if len(original) >= len(_by_size):
20
+ return original
21
+
22
+ ctor = _by_size[len(original)]
23
+ return ctor(original)
24
+
25
+
26
+ _Size1MutableSequence = _create_mut_seq_class(1)
27
+ _Size2MutableSequence = _create_mut_seq_class(2)
28
+ _Size3MutableSequence = _create_mut_seq_class(3)
29
+
30
+ _by_size.extend(
31
+ [_Size1MutableSequence, _Size1MutableSequence, _Size2MutableSequence, _Size3MutableSequence]
32
+ )
@@ -0,0 +1,30 @@
1
+ from collections.abc import MutableSet
2
+ from opticol._set import OptimizedMutableSetMeta
3
+
4
+
5
+ def _create_mut_set_class(size: int) -> type:
6
+ return OptimizedMutableSetMeta(
7
+ f"_Size{size}MutableSet",
8
+ (MutableSet,),
9
+ {},
10
+ internal_size=size,
11
+ project=project,
12
+ )
13
+
14
+
15
+ _by_size: list[type] = []
16
+
17
+
18
+ def project[T](original: MutableSet[T]) -> MutableSet[T]:
19
+ if len(original) >= len(_by_size):
20
+ return original
21
+
22
+ ctor = _by_size[len(original)]
23
+ return ctor(original)
24
+
25
+
26
+ _Size1MutableSet = _create_mut_set_class(1)
27
+ _Size2MutableSet = _create_mut_set_class(2)
28
+ _Size3MutableSet = _create_mut_set_class(3)
29
+
30
+ _by_size.extend([_Size1MutableSet, _Size1MutableSet, _Size2MutableSet, _Size3MutableSet])
@@ -0,0 +1,64 @@
1
+ from abc import ABC, abstractmethod
2
+ from collections.abc import Mapping, MutableMapping, MutableSequence, MutableSet, Sequence, Set
3
+
4
+ import opticol
5
+
6
+
7
+ class Projector(ABC):
8
+ @abstractmethod
9
+ def seq[T](self, seq: Sequence[T]) -> Sequence[T]: ...
10
+
11
+ @abstractmethod
12
+ def mut_seq[T](self, mut_seq: MutableSequence[T]) -> MutableSequence[T]: ...
13
+
14
+ @abstractmethod
15
+ def set[T](self, s: Set[T]) -> Set[T]: ...
16
+
17
+ @abstractmethod
18
+ def mut_set[T](self, mut_set: MutableSet[T]) -> MutableSet[T]: ...
19
+
20
+ @abstractmethod
21
+ def mapping[K, V](self, mapping: Mapping[K, V]) -> Mapping[K, V]: ...
22
+
23
+ @abstractmethod
24
+ def mut_mapping[K, V](self, mut_mapping: MutableMapping[K, V]) -> MutableMapping[K, V]: ...
25
+
26
+
27
+ class PassThroughProjector(ABC):
28
+ def seq[T](self, seq: Sequence[T]) -> Sequence[T]:
29
+ return seq
30
+
31
+ def mut_seq[T](self, mut_seq: MutableSequence[T]) -> MutableSequence[T]:
32
+ return mut_seq
33
+
34
+ def set[T](self, s: Set[T]) -> Set[T]:
35
+ return s
36
+
37
+ def mut_set[T](self, mut_set: MutableSet[T]) -> MutableSet[T]:
38
+ return mut_set
39
+
40
+ def mapping[K, V](self, mapping: Mapping[K, V]) -> Mapping[K, V]:
41
+ return mapping
42
+
43
+ def mut_mapping[K, V](self, mut_mapping: MutableMapping[K, V]) -> MutableMapping[K, V]:
44
+ return mut_mapping
45
+
46
+
47
+ class DefaultOptimizingProjector(Projector):
48
+ def seq[T](self, seq: Sequence[T]) -> Sequence[T]:
49
+ return opticol.seq(seq)
50
+
51
+ def mut_seq[T](self, mut_seq: MutableSequence[T]) -> MutableSequence[T]:
52
+ return opticol.mut_seq(mut_seq)
53
+
54
+ def set[T](self, s: Set[T]) -> Set[T]:
55
+ return opticol.set(s)
56
+
57
+ def mut_set[T](self, mut_set: MutableSet[T]) -> MutableSet[T]:
58
+ return opticol.mut_set(mut_set)
59
+
60
+ def mapping[K, V](self, mapping: Mapping[K, V]) -> Mapping[K, V]:
61
+ return opticol.mapping(mapping)
62
+
63
+ def mut_mapping[K, V](self, mut_mapping: MutableMapping[K, V]) -> MutableMapping[K, V]:
64
+ return opticol.mut_mapping(mut_mapping)
File without changes
@@ -0,0 +1,27 @@
1
+ from collections.abc import Sequence
2
+ from opticol._sequence import OptimizedSequenceMeta
3
+
4
+
5
+ def _create_seq_class(size: int) -> type:
6
+ return OptimizedSequenceMeta(
7
+ f"_Size{size}Sequence", (Sequence,), {}, internal_size=size, project=project
8
+ )
9
+
10
+
11
+ _by_size: list[type] = []
12
+
13
+
14
+ def project[T](original: Sequence[T]) -> Sequence[T]:
15
+ if len(original) >= len(_by_size):
16
+ return original
17
+
18
+ ctor = _by_size[len(original)]
19
+ return ctor(*original)
20
+
21
+
22
+ _Size0Sequence = _create_seq_class(0)
23
+ _Size1Sequence = _create_seq_class(1)
24
+ _Size2Sequence = _create_seq_class(2)
25
+ _Size3Sequence = _create_seq_class(3)
26
+
27
+ _by_size.extend([_Size0Sequence, _Size1Sequence, _Size2Sequence, _Size3Sequence])
@@ -0,0 +1,25 @@
1
+ from collections.abc import Set
2
+ from opticol._set import OptimizedSetMeta
3
+
4
+
5
+ def _create_set_class(size: int) -> type:
6
+ return OptimizedSetMeta(f"_Size{size}Set", (Set,), {}, internal_size=size, project=project)
7
+
8
+
9
+ _by_size: list[type] = []
10
+
11
+
12
+ def project[T](original: Set[T]) -> Set[T]:
13
+ if len(original) >= len(_by_size):
14
+ return original
15
+
16
+ ctor = _by_size[len(original)]
17
+ return ctor(*original)
18
+
19
+
20
+ _Size0Set = _create_set_class(0)
21
+ _Size1Set = _create_set_class(1)
22
+ _Size2Set = _create_set_class(2)
23
+ _Size3Set = _create_set_class(3)
24
+
25
+ _by_size.extend([_Size0Set, _Size1Set, _Size2Set, _Size3Set])
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.4
2
+ Name: opticol
3
+ Version: 0.1.0a1
4
+ Summary: Implementations for the various Python Collection ABCs that optimize along various axes.
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+
8
+ ## Opticol
9
+
10
+ Optimized collections (hence *opticol*) for Python. This package provides memory optimized versions of the base Python collection types which are:
11
+
12
+ * (Mutable)Sequence
13
+ * (Mutable)Mapping
14
+ * (Mutable)Set
15
+
16
+ The insight behind the package is the following: the size of an empty set is 216 bytes (on Python 3.14) but the size of an empty object with an empty __slots__ member is only 32 bytes. Python programs that hold large datasets in memory could benefit from using these optimized collections which fully implement the respective collection ABCs, but at a fraction of the runtime memory.
17
+
18
+ So for general users these optimizations will not be worth if if the dataset being used comfortably fits in memory, but applications which currently create tens or hundreds of thousand of Python objects could dramatically lower memory usage without API changes.
19
+
20
+ ## Usage
21
+
22
+ The optimized classes could be used directly, by creating an EmptySequence directly for example, but the recommended usage is to use the collection level `project` method which tries to project a collection instance into the memory optimized variants automatically. Additionally, there is a factory interface that could be plugged in to allow for different strategies beyond the typical `project` logic.
23
+
24
+ Consider the following example:
25
+
26
+ ```
27
+ import opticol
28
+
29
+ optimized_list = opticol.seq_project([]) # Actually an instance of EmptySequence
30
+ optimized_list_single = opticol.mut_seq_project(("MyString",)) # Actually an instance of Small1MutableSequence
31
+ ```
32
+
33
+ A small note that in the current implementation, optimization is only in one direction. That is, if the MutableSequence type is optimized for collections of size 0, 1, 2, 3, then once an operation pushes it past into size 4, further decreasing of the size will not restore the optimization.
@@ -0,0 +1,19 @@
1
+ README.md
2
+ pyproject.toml
3
+ opticol/__init__.py
4
+ opticol/_mapping.py
5
+ opticol/_sentinel.py
6
+ opticol/_sequence.py
7
+ opticol/_set.py
8
+ opticol/mapping.py
9
+ opticol/mutable_mapping.py
10
+ opticol/mutable_sequence.py
11
+ opticol/mutable_set.py
12
+ opticol/projector.py
13
+ opticol/py.typed
14
+ opticol/sequence.py
15
+ opticol/set.py
16
+ opticol.egg-info/PKG-INFO
17
+ opticol.egg-info/SOURCES.txt
18
+ opticol.egg-info/dependency_links.txt
19
+ opticol.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ opticol
@@ -0,0 +1,24 @@
1
+ [project]
2
+ name = "opticol"
3
+ version = "0.1.0a1"
4
+ description = "Implementations for the various Python Collection ABCs that optimize along various axes."
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = []
8
+
9
+ [tool.black]
10
+ line-length = 100
11
+ target-version = ["py312"]
12
+ preview = true
13
+
14
+ [tool.uv.sources]
15
+ guppy3 = { git = "https://github.com/zhuyifei1999/guppy3.git", rev = "da86ab98abf20c57b9ead4d653157af8baaa8efd" }
16
+
17
+ [dependency-groups]
18
+ dev = [
19
+ "black>=25.12.0",
20
+ "codespell>=2.4.1",
21
+ "guppy3",
22
+ "mypy>=1.19.0",
23
+ "pylint>=4.0.4",
24
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+