cotengrust 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cotengrust-0.1.3/.github/workflows/CI.yml +138 -0
- {cotengrust-0.1.2 → cotengrust-0.1.3}/Cargo.lock +1 -1
- {cotengrust-0.1.2 → cotengrust-0.1.3}/Cargo.toml +1 -1
- {cotengrust-0.1.2 → cotengrust-0.1.3}/PKG-INFO +82 -6
- {cotengrust-0.1.2 → cotengrust-0.1.3}/README.md +81 -5
- {cotengrust-0.1.2 → cotengrust-0.1.3}/pyproject.toml +1 -1
- {cotengrust-0.1.2 → cotengrust-0.1.3}/src/lib.rs +48 -7
- cotengrust-0.1.2/.github/workflows/CI.yml +0 -162
- {cotengrust-0.1.2 → cotengrust-0.1.3}/.gitignore +0 -0
- {cotengrust-0.1.2 → cotengrust-0.1.3}/LICENSE +0 -0
- {cotengrust-0.1.2 → cotengrust-0.1.3}/tests/test_cotengrust.py +0 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# This file is autogenerated by maturin v1.5.1
|
|
2
|
+
# To update, run
|
|
3
|
+
#
|
|
4
|
+
# maturin generate-ci github
|
|
5
|
+
#
|
|
6
|
+
name: CI
|
|
7
|
+
|
|
8
|
+
on:
|
|
9
|
+
push:
|
|
10
|
+
branches:
|
|
11
|
+
- main
|
|
12
|
+
- master
|
|
13
|
+
tags:
|
|
14
|
+
- '*'
|
|
15
|
+
pull_request:
|
|
16
|
+
workflow_dispatch:
|
|
17
|
+
|
|
18
|
+
permissions:
|
|
19
|
+
contents: read
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
linux:
|
|
23
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
24
|
+
strategy:
|
|
25
|
+
matrix:
|
|
26
|
+
platform:
|
|
27
|
+
- runner: ubuntu-latest
|
|
28
|
+
target: x86_64
|
|
29
|
+
- runner: ubuntu-latest
|
|
30
|
+
target: x86
|
|
31
|
+
- runner: ubuntu-latest
|
|
32
|
+
target: aarch64
|
|
33
|
+
- runner: ubuntu-latest
|
|
34
|
+
target: armv7
|
|
35
|
+
- runner: ubuntu-latest
|
|
36
|
+
target: s390x
|
|
37
|
+
- runner: ubuntu-latest
|
|
38
|
+
target: ppc64le
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
- uses: actions/setup-python@v5
|
|
42
|
+
with:
|
|
43
|
+
python-version: '3.10'
|
|
44
|
+
- name: Build wheels
|
|
45
|
+
uses: PyO3/maturin-action@v1
|
|
46
|
+
with:
|
|
47
|
+
target: ${{ matrix.platform.target }}
|
|
48
|
+
args: --release --out dist --find-interpreter
|
|
49
|
+
sccache: 'true'
|
|
50
|
+
manylinux: auto
|
|
51
|
+
- name: Upload wheels
|
|
52
|
+
uses: actions/upload-artifact@v4
|
|
53
|
+
with:
|
|
54
|
+
name: wheels-linux-${{ matrix.platform.target }}
|
|
55
|
+
path: dist
|
|
56
|
+
|
|
57
|
+
windows:
|
|
58
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
59
|
+
strategy:
|
|
60
|
+
matrix:
|
|
61
|
+
platform:
|
|
62
|
+
- runner: windows-latest
|
|
63
|
+
target: x64
|
|
64
|
+
- runner: windows-latest
|
|
65
|
+
target: x86
|
|
66
|
+
steps:
|
|
67
|
+
- uses: actions/checkout@v4
|
|
68
|
+
- uses: actions/setup-python@v5
|
|
69
|
+
with:
|
|
70
|
+
python-version: '3.10'
|
|
71
|
+
architecture: ${{ matrix.platform.target }}
|
|
72
|
+
- name: Build wheels
|
|
73
|
+
uses: PyO3/maturin-action@v1
|
|
74
|
+
with:
|
|
75
|
+
target: ${{ matrix.platform.target }}
|
|
76
|
+
args: --release --out dist --find-interpreter
|
|
77
|
+
sccache: 'true'
|
|
78
|
+
- name: Upload wheels
|
|
79
|
+
uses: actions/upload-artifact@v4
|
|
80
|
+
with:
|
|
81
|
+
name: wheels-windows-${{ matrix.platform.target }}
|
|
82
|
+
path: dist
|
|
83
|
+
|
|
84
|
+
macos:
|
|
85
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
86
|
+
strategy:
|
|
87
|
+
matrix:
|
|
88
|
+
platform:
|
|
89
|
+
- runner: macos-latest
|
|
90
|
+
target: x86_64
|
|
91
|
+
- runner: macos-14
|
|
92
|
+
target: aarch64
|
|
93
|
+
steps:
|
|
94
|
+
- uses: actions/checkout@v4
|
|
95
|
+
- uses: actions/setup-python@v5
|
|
96
|
+
with:
|
|
97
|
+
python-version: '3.10'
|
|
98
|
+
- name: Build wheels
|
|
99
|
+
uses: PyO3/maturin-action@v1
|
|
100
|
+
with:
|
|
101
|
+
target: ${{ matrix.platform.target }}
|
|
102
|
+
args: --release --out dist --find-interpreter
|
|
103
|
+
sccache: 'true'
|
|
104
|
+
- name: Upload wheels
|
|
105
|
+
uses: actions/upload-artifact@v4
|
|
106
|
+
with:
|
|
107
|
+
name: wheels-macos-${{ matrix.platform.target }}
|
|
108
|
+
path: dist
|
|
109
|
+
|
|
110
|
+
sdist:
|
|
111
|
+
runs-on: ubuntu-latest
|
|
112
|
+
steps:
|
|
113
|
+
- uses: actions/checkout@v4
|
|
114
|
+
- name: Build sdist
|
|
115
|
+
uses: PyO3/maturin-action@v1
|
|
116
|
+
with:
|
|
117
|
+
command: sdist
|
|
118
|
+
args: --out dist
|
|
119
|
+
- name: Upload sdist
|
|
120
|
+
uses: actions/upload-artifact@v4
|
|
121
|
+
with:
|
|
122
|
+
name: wheels-sdist
|
|
123
|
+
path: dist
|
|
124
|
+
|
|
125
|
+
release:
|
|
126
|
+
name: Release
|
|
127
|
+
runs-on: ubuntu-latest
|
|
128
|
+
if: startsWith(github.ref, 'refs/tags/')
|
|
129
|
+
needs: [linux, windows, macos, sdist]
|
|
130
|
+
steps:
|
|
131
|
+
- uses: actions/download-artifact@v4
|
|
132
|
+
- name: Publish to PyPI
|
|
133
|
+
uses: PyO3/maturin-action@v1
|
|
134
|
+
env:
|
|
135
|
+
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
|
|
136
|
+
with:
|
|
137
|
+
command: upload
|
|
138
|
+
args: --non-interactive --skip-existing wheels-*/*
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: cotengrust
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Classifier: Programming Language :: Rust
|
|
5
5
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
6
6
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
@@ -19,9 +19,14 @@ are:
|
|
|
19
19
|
- `optimize_optimal(inputs, output, size_dict, **kwargs)`
|
|
20
20
|
- `optimize_greedy(inputs, output, size_dict, **kwargs)`
|
|
21
21
|
|
|
22
|
-
The optimal algorithm is an optimized version of the `opt_einsum` 'dp'
|
|
22
|
+
The optimal algorithm is an optimized version of the `opt_einsum` 'dp'
|
|
23
23
|
path - itself an implementation of https://arxiv.org/abs/1304.6112.
|
|
24
24
|
|
|
25
|
+
There is also a variant of the greedy algorithm, which runs `ntrials` of greedy,
|
|
26
|
+
randomized paths and computes and reports the flops cost (log10) simultaneously:
|
|
27
|
+
|
|
28
|
+
- `optimize_random_greedy_track_flops(inputs, output, size_dict, **kwargs)`
|
|
29
|
+
|
|
25
30
|
|
|
26
31
|
## Installation
|
|
27
32
|
|
|
@@ -32,7 +37,7 @@ path - itself an implementation of https://arxiv.org/abs/1304.6112.
|
|
|
32
37
|
pip install cotengrust
|
|
33
38
|
```
|
|
34
39
|
|
|
35
|
-
or if you want to develop locally (which requires [pyo3](https://github.com/PyO3/pyo3)
|
|
40
|
+
or if you want to develop locally (which requires [pyo3](https://github.com/PyO3/pyo3)
|
|
36
41
|
and [maturin](https://github.com/PyO3/maturin)):
|
|
37
42
|
|
|
38
43
|
```bash
|
|
@@ -46,8 +51,8 @@ maturin develop --release
|
|
|
46
51
|
## Usage
|
|
47
52
|
|
|
48
53
|
If `cotengrust` is installed, then by default `cotengra` will use it for its
|
|
49
|
-
greedy and optimal subroutines, notably subtree
|
|
50
|
-
call the routines directly:
|
|
54
|
+
greedy, random-greedy, and optimal subroutines, notably subtree
|
|
55
|
+
reconfiguration. You can also call the routines directly:
|
|
51
56
|
|
|
52
57
|
```python
|
|
53
58
|
import cotengra as ctg
|
|
@@ -171,7 +176,7 @@ def optimize_greedy(
|
|
|
171
176
|
When assessing local greedy scores how much to weight the size of the
|
|
172
177
|
tensors removed compared to the size of the tensor added::
|
|
173
178
|
|
|
174
|
-
score = size_ab
|
|
179
|
+
score = size_ab / costmod - (size_a + size_b) * costmod
|
|
175
180
|
|
|
176
181
|
This can be a useful hyper-parameter to tune.
|
|
177
182
|
temperature : float, optional
|
|
@@ -237,6 +242,77 @@ def optimize_simplify(
|
|
|
237
242
|
"""
|
|
238
243
|
...
|
|
239
244
|
|
|
245
|
+
def optimize_random_greedy_track_flops(
|
|
246
|
+
inputs,
|
|
247
|
+
output,
|
|
248
|
+
size_dict,
|
|
249
|
+
ntrials=1,
|
|
250
|
+
costmod=(0.1, 4.0),
|
|
251
|
+
temperature=(0.001, 1.0),
|
|
252
|
+
seed=None,
|
|
253
|
+
simplify=True,
|
|
254
|
+
use_ssa=False,
|
|
255
|
+
):
|
|
256
|
+
"""Perform a batch of random greedy optimizations, simulteneously tracking
|
|
257
|
+
the best contraction path in terms of flops, so as to avoid constructing a
|
|
258
|
+
separate contraction tree.
|
|
259
|
+
|
|
260
|
+
Parameters
|
|
261
|
+
----------
|
|
262
|
+
inputs : tuple[tuple[str]]
|
|
263
|
+
The indices of each input tensor.
|
|
264
|
+
output : tuple[str]
|
|
265
|
+
The indices of the output tensor.
|
|
266
|
+
size_dict : dict[str, int]
|
|
267
|
+
A dictionary mapping indices to their dimension.
|
|
268
|
+
ntrials : int, optional
|
|
269
|
+
The number of random greedy trials to perform. The default is 1.
|
|
270
|
+
costmod : (float, float), optional
|
|
271
|
+
When assessing local greedy scores how much to weight the size of the
|
|
272
|
+
tensors removed compared to the size of the tensor added::
|
|
273
|
+
|
|
274
|
+
score = size_ab / costmod - (size_a + size_b) * costmod
|
|
275
|
+
|
|
276
|
+
It is sampled uniformly from the given range.
|
|
277
|
+
temperature : (float, float), optional
|
|
278
|
+
When asessing local greedy scores, how much to randomly perturb the
|
|
279
|
+
score. This is implemented as::
|
|
280
|
+
|
|
281
|
+
score -> sign(score) * log(|score|) - temperature * gumbel()
|
|
282
|
+
|
|
283
|
+
which implements boltzmann sampling. It is sampled log-uniformly from
|
|
284
|
+
the given range.
|
|
285
|
+
seed : int, optional
|
|
286
|
+
The seed for the random number generator.
|
|
287
|
+
simplify : bool, optional
|
|
288
|
+
Whether to perform simplifications before optimizing. These are:
|
|
289
|
+
|
|
290
|
+
- ignore any indices that appear in all terms
|
|
291
|
+
- combine any repeated indices within a single term
|
|
292
|
+
- reduce any non-output indices that only appear on a single term
|
|
293
|
+
- combine any scalar terms
|
|
294
|
+
- combine any tensors with matching indices (hadamard products)
|
|
295
|
+
|
|
296
|
+
Such simpifications may be required in the general case for the proper
|
|
297
|
+
functioning of the core optimization, but may be skipped if the input
|
|
298
|
+
indices are already in a simplified form.
|
|
299
|
+
use_ssa : bool, optional
|
|
300
|
+
Whether to return the contraction path in 'single static assignment'
|
|
301
|
+
(SSA) format (i.e. as if each intermediate is appended to the list of
|
|
302
|
+
inputs, without removals). This can be quicker and easier to work with
|
|
303
|
+
than the 'linear recycled' format that `numpy` and `opt_einsum` use.
|
|
304
|
+
|
|
305
|
+
Returns
|
|
306
|
+
-------
|
|
307
|
+
path : list[list[int]]
|
|
308
|
+
The best contraction path, given as a sequence of pairs of node
|
|
309
|
+
indices.
|
|
310
|
+
flops : float
|
|
311
|
+
The flops (/ contraction cost / number of multiplications), of the best
|
|
312
|
+
contraction path, given log10.
|
|
313
|
+
"""
|
|
314
|
+
...
|
|
315
|
+
|
|
240
316
|
def ssa_to_linear(ssa_path, n=None):
|
|
241
317
|
"""Convert a SSA path to linear format."""
|
|
242
318
|
...
|
|
@@ -7,9 +7,14 @@ are:
|
|
|
7
7
|
- `optimize_optimal(inputs, output, size_dict, **kwargs)`
|
|
8
8
|
- `optimize_greedy(inputs, output, size_dict, **kwargs)`
|
|
9
9
|
|
|
10
|
-
The optimal algorithm is an optimized version of the `opt_einsum` 'dp'
|
|
10
|
+
The optimal algorithm is an optimized version of the `opt_einsum` 'dp'
|
|
11
11
|
path - itself an implementation of https://arxiv.org/abs/1304.6112.
|
|
12
12
|
|
|
13
|
+
There is also a variant of the greedy algorithm, which runs `ntrials` of greedy,
|
|
14
|
+
randomized paths and computes and reports the flops cost (log10) simultaneously:
|
|
15
|
+
|
|
16
|
+
- `optimize_random_greedy_track_flops(inputs, output, size_dict, **kwargs)`
|
|
17
|
+
|
|
13
18
|
|
|
14
19
|
## Installation
|
|
15
20
|
|
|
@@ -20,7 +25,7 @@ path - itself an implementation of https://arxiv.org/abs/1304.6112.
|
|
|
20
25
|
pip install cotengrust
|
|
21
26
|
```
|
|
22
27
|
|
|
23
|
-
or if you want to develop locally (which requires [pyo3](https://github.com/PyO3/pyo3)
|
|
28
|
+
or if you want to develop locally (which requires [pyo3](https://github.com/PyO3/pyo3)
|
|
24
29
|
and [maturin](https://github.com/PyO3/maturin)):
|
|
25
30
|
|
|
26
31
|
```bash
|
|
@@ -34,8 +39,8 @@ maturin develop --release
|
|
|
34
39
|
## Usage
|
|
35
40
|
|
|
36
41
|
If `cotengrust` is installed, then by default `cotengra` will use it for its
|
|
37
|
-
greedy and optimal subroutines, notably subtree
|
|
38
|
-
call the routines directly:
|
|
42
|
+
greedy, random-greedy, and optimal subroutines, notably subtree
|
|
43
|
+
reconfiguration. You can also call the routines directly:
|
|
39
44
|
|
|
40
45
|
```python
|
|
41
46
|
import cotengra as ctg
|
|
@@ -159,7 +164,7 @@ def optimize_greedy(
|
|
|
159
164
|
When assessing local greedy scores how much to weight the size of the
|
|
160
165
|
tensors removed compared to the size of the tensor added::
|
|
161
166
|
|
|
162
|
-
score = size_ab
|
|
167
|
+
score = size_ab / costmod - (size_a + size_b) * costmod
|
|
163
168
|
|
|
164
169
|
This can be a useful hyper-parameter to tune.
|
|
165
170
|
temperature : float, optional
|
|
@@ -225,6 +230,77 @@ def optimize_simplify(
|
|
|
225
230
|
"""
|
|
226
231
|
...
|
|
227
232
|
|
|
233
|
+
def optimize_random_greedy_track_flops(
|
|
234
|
+
inputs,
|
|
235
|
+
output,
|
|
236
|
+
size_dict,
|
|
237
|
+
ntrials=1,
|
|
238
|
+
costmod=(0.1, 4.0),
|
|
239
|
+
temperature=(0.001, 1.0),
|
|
240
|
+
seed=None,
|
|
241
|
+
simplify=True,
|
|
242
|
+
use_ssa=False,
|
|
243
|
+
):
|
|
244
|
+
"""Perform a batch of random greedy optimizations, simulteneously tracking
|
|
245
|
+
the best contraction path in terms of flops, so as to avoid constructing a
|
|
246
|
+
separate contraction tree.
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
inputs : tuple[tuple[str]]
|
|
251
|
+
The indices of each input tensor.
|
|
252
|
+
output : tuple[str]
|
|
253
|
+
The indices of the output tensor.
|
|
254
|
+
size_dict : dict[str, int]
|
|
255
|
+
A dictionary mapping indices to their dimension.
|
|
256
|
+
ntrials : int, optional
|
|
257
|
+
The number of random greedy trials to perform. The default is 1.
|
|
258
|
+
costmod : (float, float), optional
|
|
259
|
+
When assessing local greedy scores how much to weight the size of the
|
|
260
|
+
tensors removed compared to the size of the tensor added::
|
|
261
|
+
|
|
262
|
+
score = size_ab / costmod - (size_a + size_b) * costmod
|
|
263
|
+
|
|
264
|
+
It is sampled uniformly from the given range.
|
|
265
|
+
temperature : (float, float), optional
|
|
266
|
+
When asessing local greedy scores, how much to randomly perturb the
|
|
267
|
+
score. This is implemented as::
|
|
268
|
+
|
|
269
|
+
score -> sign(score) * log(|score|) - temperature * gumbel()
|
|
270
|
+
|
|
271
|
+
which implements boltzmann sampling. It is sampled log-uniformly from
|
|
272
|
+
the given range.
|
|
273
|
+
seed : int, optional
|
|
274
|
+
The seed for the random number generator.
|
|
275
|
+
simplify : bool, optional
|
|
276
|
+
Whether to perform simplifications before optimizing. These are:
|
|
277
|
+
|
|
278
|
+
- ignore any indices that appear in all terms
|
|
279
|
+
- combine any repeated indices within a single term
|
|
280
|
+
- reduce any non-output indices that only appear on a single term
|
|
281
|
+
- combine any scalar terms
|
|
282
|
+
- combine any tensors with matching indices (hadamard products)
|
|
283
|
+
|
|
284
|
+
Such simpifications may be required in the general case for the proper
|
|
285
|
+
functioning of the core optimization, but may be skipped if the input
|
|
286
|
+
indices are already in a simplified form.
|
|
287
|
+
use_ssa : bool, optional
|
|
288
|
+
Whether to return the contraction path in 'single static assignment'
|
|
289
|
+
(SSA) format (i.e. as if each intermediate is appended to the list of
|
|
290
|
+
inputs, without removals). This can be quicker and easier to work with
|
|
291
|
+
than the 'linear recycled' format that `numpy` and `opt_einsum` use.
|
|
292
|
+
|
|
293
|
+
Returns
|
|
294
|
+
-------
|
|
295
|
+
path : list[list[int]]
|
|
296
|
+
The best contraction path, given as a sequence of pairs of node
|
|
297
|
+
indices.
|
|
298
|
+
flops : float
|
|
299
|
+
The flops (/ contraction cost / number of multiplications), of the best
|
|
300
|
+
contraction path, given log10.
|
|
301
|
+
"""
|
|
302
|
+
...
|
|
303
|
+
|
|
228
304
|
def ssa_to_linear(ssa_path, n=None):
|
|
229
305
|
"""Convert a SSA path to linear format."""
|
|
230
306
|
...
|
|
@@ -34,6 +34,7 @@ struct ContractionProcessor {
|
|
|
34
34
|
ssa_path: SSAPath,
|
|
35
35
|
track_flops: bool,
|
|
36
36
|
flops: Score,
|
|
37
|
+
flops_limit: Score,
|
|
37
38
|
}
|
|
38
39
|
|
|
39
40
|
/// given log(x) and log(y) compute log(x + y), without exponentiating both
|
|
@@ -195,6 +196,7 @@ impl ContractionProcessor {
|
|
|
195
196
|
let ssa = nodes.len() as Node;
|
|
196
197
|
let ssa_path: SSAPath = Vec::with_capacity(2 * ssa as usize - 1);
|
|
197
198
|
let flops: Score = 0.0;
|
|
199
|
+
let flops_limit: Score = Score::INFINITY;
|
|
198
200
|
|
|
199
201
|
ContractionProcessor {
|
|
200
202
|
nodes,
|
|
@@ -205,6 +207,7 @@ impl ContractionProcessor {
|
|
|
205
207
|
ssa_path,
|
|
206
208
|
track_flops,
|
|
207
209
|
flops,
|
|
210
|
+
flops_limit,
|
|
208
211
|
}
|
|
209
212
|
}
|
|
210
213
|
|
|
@@ -415,7 +418,7 @@ impl ContractionProcessor {
|
|
|
415
418
|
costmod: Option<f32>,
|
|
416
419
|
temperature: Option<f32>,
|
|
417
420
|
seed: Option<u64>,
|
|
418
|
-
) {
|
|
421
|
+
) -> bool {
|
|
419
422
|
let coeff_t = temperature.unwrap_or(0.0);
|
|
420
423
|
let log_coeff_a = f32::ln(costmod.unwrap_or(1.0));
|
|
421
424
|
|
|
@@ -435,7 +438,7 @@ impl ContractionProcessor {
|
|
|
435
438
|
} else {
|
|
436
439
|
0.0 as f32
|
|
437
440
|
};
|
|
438
|
-
logsub(sab
|
|
441
|
+
logsub(sab - log_coeff_a, logadd(sa, sb) + log_coeff_a) - gumbel
|
|
439
442
|
};
|
|
440
443
|
|
|
441
444
|
// cache all current nodes sizes as we go
|
|
@@ -483,6 +486,12 @@ impl ContractionProcessor {
|
|
|
483
486
|
|
|
484
487
|
// perform contraction:
|
|
485
488
|
let k = self.contract_nodes_given_legs(i, j, klegs.clone());
|
|
489
|
+
|
|
490
|
+
if self.track_flops && self.flops >= self.flops_limit {
|
|
491
|
+
// stop if we have reached the flops limit
|
|
492
|
+
return false;
|
|
493
|
+
}
|
|
494
|
+
|
|
486
495
|
node_sizes.insert(k, ksize);
|
|
487
496
|
|
|
488
497
|
for l in self.neighbors(k) {
|
|
@@ -498,6 +507,8 @@ impl ContractionProcessor {
|
|
|
498
507
|
c -= 1;
|
|
499
508
|
}
|
|
500
509
|
}
|
|
510
|
+
// success
|
|
511
|
+
return true;
|
|
501
512
|
}
|
|
502
513
|
|
|
503
514
|
/// Optimize the contraction order of all terms using a greedy algorithm
|
|
@@ -945,14 +956,23 @@ fn optimize_random_greedy_track_flops(
|
|
|
945
956
|
output: Vec<char>,
|
|
946
957
|
size_dict: Dict<char, f32>,
|
|
947
958
|
ntrials: usize,
|
|
948
|
-
costmod: Option<f32>,
|
|
949
|
-
temperature: Option<f32>,
|
|
959
|
+
costmod: Option<(f32, f32)>,
|
|
960
|
+
temperature: Option<(f32, f32)>,
|
|
950
961
|
seed: Option<u64>,
|
|
951
962
|
simplify: Option<bool>,
|
|
952
963
|
use_ssa: Option<bool>,
|
|
953
964
|
) -> (Vec<Vec<Node>>, Score) {
|
|
954
965
|
py.allow_threads(|| {
|
|
955
|
-
let
|
|
966
|
+
let (costmod_min, costmod_max) = costmod.unwrap_or((0.1, 4.0));
|
|
967
|
+
let costmod_diff = (costmod_max - costmod_min).abs();
|
|
968
|
+
let is_const_costmod = costmod_diff < Score::EPSILON;
|
|
969
|
+
|
|
970
|
+
let (temp_min, temp_max) = temperature.unwrap_or((0.001, 1.0));
|
|
971
|
+
let log_temp_min = Score::ln(temp_min);
|
|
972
|
+
let log_temp_max = Score::ln(temp_max);
|
|
973
|
+
let log_temp_diff = (log_temp_max - log_temp_min).abs();
|
|
974
|
+
let is_const_temp = log_temp_diff < Score::EPSILON;
|
|
975
|
+
|
|
956
976
|
let mut rng = match seed {
|
|
957
977
|
Some(seed) => rand::rngs::StdRng::seed_from_u64(seed),
|
|
958
978
|
None => rand::rngs::StdRng::from_entropy(),
|
|
@@ -971,14 +991,35 @@ fn optimize_random_greedy_track_flops(
|
|
|
971
991
|
|
|
972
992
|
for seed in seeds {
|
|
973
993
|
let mut cp = cp0.clone();
|
|
994
|
+
|
|
995
|
+
// uniform sample for costmod
|
|
996
|
+
let costmod = if is_const_costmod {
|
|
997
|
+
costmod_min
|
|
998
|
+
} else {
|
|
999
|
+
costmod_min + rng.gen::<f32>() * costmod_diff
|
|
1000
|
+
};
|
|
1001
|
+
|
|
1002
|
+
// log-uniform sample for temperature
|
|
1003
|
+
let temperature = if is_const_temp {
|
|
1004
|
+
temp_min
|
|
1005
|
+
} else {
|
|
1006
|
+
f32::exp(log_temp_min + rng.gen::<f32>() * log_temp_diff)
|
|
1007
|
+
};
|
|
1008
|
+
|
|
974
1009
|
// greedily contract each connected subgraph
|
|
975
|
-
cp.optimize_greedy(costmod, Some(temperature), Some(seed));
|
|
1010
|
+
let success = cp.optimize_greedy(Some(costmod), Some(temperature), Some(seed));
|
|
1011
|
+
|
|
1012
|
+
if !success {
|
|
1013
|
+
continue;
|
|
1014
|
+
}
|
|
1015
|
+
|
|
976
1016
|
// optimize any remaining disconnected terms
|
|
977
1017
|
cp.optimize_remaining_by_size();
|
|
978
1018
|
|
|
979
1019
|
if cp.flops < best_flops {
|
|
980
|
-
best_flops = cp.flops;
|
|
981
1020
|
best_path = Some(cp.ssa_path);
|
|
1021
|
+
best_flops = cp.flops;
|
|
1022
|
+
cp0.flops_limit = cp.flops;
|
|
982
1023
|
}
|
|
983
1024
|
}
|
|
984
1025
|
|
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
# This file is autogenerated by maturin v1.2.3
|
|
2
|
-
# To update, run
|
|
3
|
-
#
|
|
4
|
-
# maturin generate-ci github --pytest
|
|
5
|
-
#
|
|
6
|
-
name: CI
|
|
7
|
-
|
|
8
|
-
on:
|
|
9
|
-
push:
|
|
10
|
-
branches:
|
|
11
|
-
- main
|
|
12
|
-
- master
|
|
13
|
-
tags:
|
|
14
|
-
- '*'
|
|
15
|
-
pull_request:
|
|
16
|
-
workflow_dispatch:
|
|
17
|
-
|
|
18
|
-
permissions:
|
|
19
|
-
contents: read
|
|
20
|
-
|
|
21
|
-
jobs:
|
|
22
|
-
linux:
|
|
23
|
-
runs-on: ubuntu-latest
|
|
24
|
-
strategy:
|
|
25
|
-
matrix:
|
|
26
|
-
target: [x86_64, x86, aarch64, armv7, s390x, ppc64le]
|
|
27
|
-
steps:
|
|
28
|
-
- uses: actions/checkout@v3
|
|
29
|
-
- uses: actions/setup-python@v4
|
|
30
|
-
with:
|
|
31
|
-
python-version: '3.10'
|
|
32
|
-
- name: Build wheels
|
|
33
|
-
uses: PyO3/maturin-action@v1
|
|
34
|
-
with:
|
|
35
|
-
target: ${{ matrix.target }}
|
|
36
|
-
args: --release --out dist --find-interpreter
|
|
37
|
-
sccache: 'true'
|
|
38
|
-
manylinux: auto
|
|
39
|
-
- name: Upload wheels
|
|
40
|
-
uses: actions/upload-artifact@v3
|
|
41
|
-
with:
|
|
42
|
-
name: wheels
|
|
43
|
-
path: dist
|
|
44
|
-
- name: pytest
|
|
45
|
-
if: ${{ startsWith(matrix.target, 'x86_64') }}
|
|
46
|
-
shell: bash
|
|
47
|
-
run: |
|
|
48
|
-
set -e
|
|
49
|
-
ls dist/*
|
|
50
|
-
pip install cotengrust --find-links dist --force-reinstall
|
|
51
|
-
pip install pytest numpy cotengra
|
|
52
|
-
pytest --verbose
|
|
53
|
-
- name: pytest
|
|
54
|
-
if: ${{ !startsWith(matrix.target, 'x86') && matrix.target != 'ppc64' }}
|
|
55
|
-
uses: uraimo/run-on-arch-action@v2.5.0
|
|
56
|
-
with:
|
|
57
|
-
arch: ${{ matrix.target }}
|
|
58
|
-
distro: ubuntu22.04
|
|
59
|
-
githubToken: ${{ github.token }}
|
|
60
|
-
install: |
|
|
61
|
-
apt-get update
|
|
62
|
-
apt-get install -y --no-install-recommends python3 python3-pip
|
|
63
|
-
pip3 install -U pip pytest # numpy cotengra
|
|
64
|
-
run: |
|
|
65
|
-
set -e
|
|
66
|
-
pip3 install cotengrust --find-links dist --force-reinstall
|
|
67
|
-
pytest --verbose
|
|
68
|
-
|
|
69
|
-
windows:
|
|
70
|
-
runs-on: windows-latest
|
|
71
|
-
strategy:
|
|
72
|
-
matrix:
|
|
73
|
-
target: [x64, x86]
|
|
74
|
-
steps:
|
|
75
|
-
- uses: actions/checkout@v3
|
|
76
|
-
- uses: actions/setup-python@v4
|
|
77
|
-
with:
|
|
78
|
-
python-version: '3.10'
|
|
79
|
-
architecture: ${{ matrix.target }}
|
|
80
|
-
- name: Build wheels
|
|
81
|
-
uses: PyO3/maturin-action@v1
|
|
82
|
-
with:
|
|
83
|
-
target: ${{ matrix.target }}
|
|
84
|
-
args: --release --out dist --find-interpreter
|
|
85
|
-
sccache: 'true'
|
|
86
|
-
- name: Upload wheels
|
|
87
|
-
uses: actions/upload-artifact@v3
|
|
88
|
-
with:
|
|
89
|
-
name: wheels
|
|
90
|
-
path: dist
|
|
91
|
-
- name: pytest
|
|
92
|
-
if: ${{ !startsWith(matrix.target, 'aarch64') }}
|
|
93
|
-
shell: bash
|
|
94
|
-
run: |
|
|
95
|
-
set -e
|
|
96
|
-
ls dist/*
|
|
97
|
-
pip install cotengrust --find-links dist --force-reinstall
|
|
98
|
-
pip install pytest numpy cotengra
|
|
99
|
-
pytest --verbose
|
|
100
|
-
|
|
101
|
-
macos:
|
|
102
|
-
runs-on: macos-latest
|
|
103
|
-
strategy:
|
|
104
|
-
matrix:
|
|
105
|
-
target: [x86_64, aarch64]
|
|
106
|
-
steps:
|
|
107
|
-
- uses: actions/checkout@v3
|
|
108
|
-
- uses: actions/setup-python@v4
|
|
109
|
-
with:
|
|
110
|
-
python-version: '3.10'
|
|
111
|
-
- name: Build wheels
|
|
112
|
-
uses: PyO3/maturin-action@v1
|
|
113
|
-
with:
|
|
114
|
-
target: ${{ matrix.target }}
|
|
115
|
-
args: --release --out dist --find-interpreter
|
|
116
|
-
sccache: 'true'
|
|
117
|
-
- name: Upload wheels
|
|
118
|
-
uses: actions/upload-artifact@v3
|
|
119
|
-
with:
|
|
120
|
-
name: wheels
|
|
121
|
-
path: dist
|
|
122
|
-
- name: pytest
|
|
123
|
-
if: ${{ !startsWith(matrix.target, 'aarch64') }}
|
|
124
|
-
shell: bash
|
|
125
|
-
run: |
|
|
126
|
-
set -e
|
|
127
|
-
ls dist/*
|
|
128
|
-
pip install cotengrust --find-links dist --force-reinstall
|
|
129
|
-
pip install pytest numpy cotengra
|
|
130
|
-
pytest --verbose
|
|
131
|
-
|
|
132
|
-
sdist:
|
|
133
|
-
runs-on: ubuntu-latest
|
|
134
|
-
steps:
|
|
135
|
-
- uses: actions/checkout@v3
|
|
136
|
-
- name: Build sdist
|
|
137
|
-
uses: PyO3/maturin-action@v1
|
|
138
|
-
with:
|
|
139
|
-
command: sdist
|
|
140
|
-
args: --out dist
|
|
141
|
-
- name: Upload sdist
|
|
142
|
-
uses: actions/upload-artifact@v3
|
|
143
|
-
with:
|
|
144
|
-
name: wheels
|
|
145
|
-
path: dist
|
|
146
|
-
|
|
147
|
-
release:
|
|
148
|
-
name: Release
|
|
149
|
-
runs-on: ubuntu-latest
|
|
150
|
-
if: startsWith(github.ref, 'refs/tags/')
|
|
151
|
-
needs: [linux, windows, macos, sdist]
|
|
152
|
-
steps:
|
|
153
|
-
- uses: actions/download-artifact@v3
|
|
154
|
-
with:
|
|
155
|
-
name: wheels
|
|
156
|
-
- name: Publish to PyPI
|
|
157
|
-
uses: PyO3/maturin-action@v1
|
|
158
|
-
env:
|
|
159
|
-
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
|
|
160
|
-
with:
|
|
161
|
-
command: upload
|
|
162
|
-
args: --non-interactive --skip-existing *
|
|
File without changes
|
|
File without changes
|
|
File without changes
|