env-ssl-wrapper 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: env-ssl-wrapper
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: An RL environment wrapper for learning SSL in the background
5
5
  Project-URL: Homepage, https://pypi.org/project/env-ssl-wrapper/
6
6
  Project-URL: Repository, https://codeberg.org/lucidrains/env-ssl-wrapper
@@ -34,10 +34,14 @@ Classifier: License :: OSI Approved :: MIT License
34
34
  Classifier: Programming Language :: Python :: 3.10
35
35
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
36
36
  Requires-Python: >=3.10
37
+ Requires-Dist: discrete-continuous-embed-readout
37
38
  Requires-Dist: einops>=0.8.1
38
39
  Requires-Dist: einx>=0.3.0
40
+ Requires-Dist: memmap-replay-buffer
39
41
  Requires-Dist: torch-einops-utils>=0.0.29
40
42
  Requires-Dist: torch>=2.5
43
+ Requires-Dist: x-mlps-pytorch
44
+ Requires-Dist: x-transformers
41
45
  Provides-Extra: examples
42
46
  Provides-Extra: test
43
47
  Requires-Dist: pytest; extra == 'test'
@@ -46,3 +50,63 @@ Description-Content-Type: text/markdown
46
50
  ## env-ssl-wrapper (wip)
47
51
 
48
52
  Wrappers around environments that will take care of providing representations from self supervised learning automagically
53
+
54
+ ## Citations
55
+
56
+ ```bibtex
57
+ @misc{schwarzer2021dataefficientreinforcementlearningselfpredictive,
58
+ title = {Data-Efficient Reinforcement Learning with Self-Predictive Representations},
59
+ author = {Max Schwarzer and Ankesh Anand and Rishab Goel and R Devon Hjelm and Aaron Courville and Philip Bachman},
60
+ year = {2021},
61
+ eprint = {2007.05929},
62
+ archivePrefix = {arXiv},
63
+ primaryClass = {cs.LG},
64
+ url = {https://arxiv.org/abs/2007.05929},
65
+ }
66
+ ```
67
+
68
+ ```bibtex
69
+ @misc{schmidt2024learningactactions,
70
+ title = {Learning to Act without Actions},
71
+ author = {Dominik Schmidt and Minqi Jiang},
72
+ year = {2024},
73
+ eprint = {2312.10812},
74
+ archivePrefix = {arXiv},
75
+ primaryClass = {cs.LG},
76
+ url = {https://arxiv.org/abs/2312.10812},
77
+ }
78
+ ```
79
+
80
+ ```bibtex
81
+ @misc{eysenbach2023contrastivelearninggoalconditionedreinforcement,
82
+ title = {Contrastive Learning as Goal-Conditioned Reinforcement Learning},
83
+ author = {Benjamin Eysenbach and Tianjun Zhang and Ruslan Salakhutdinov and Sergey Levine},
84
+ year = {2023},
85
+ eprint = {2206.07568},
86
+ archivePrefix = {arXiv},
87
+ primaryClass = {cs.LG},
88
+ url = {https://arxiv.org/abs/2206.07568},
89
+ }
90
+ ```
91
+
92
+ ```bibtex
93
+ @misc{ashlag2025stateentropyregularizationrobust,
94
+ title = {State Entropy Regularization for Robust Reinforcement Learning},
95
+ author = {Yonatan Ashlag and Uri Koren and Mirco Mutti and Esther Derman and Pierre-Luc Bacon and Shie Mannor},
96
+ year = {2025},
97
+ eprint = {2506.07085},
98
+ archivePrefix = {arXiv},
99
+ primaryClass = {cs.LG},
100
+ url = {https://arxiv.org/abs/2506.07085},
101
+ }
102
+ ```
103
+
104
+ ```bibtex
105
+ @inproceedings{park2026dual,
106
+ title = {Dual Goal Representations},
107
+ author = {Seohong Park and Deepinder Mann and Sergey Levine},
108
+ booktitle = {The Fourteenth International Conference on Learning Representations},
109
+ year = {2026},
110
+ url = {https://openreview.net/forum?id=aMKFTidLSM}
111
+ }
112
+ ```
@@ -0,0 +1,63 @@
1
+ ## env-ssl-wrapper (wip)
2
+
3
+ Wrappers around environments that will take care of providing representations from self supervised learning automagically
4
+
5
+ ## Citations
6
+
7
+ ```bibtex
8
+ @misc{schwarzer2021dataefficientreinforcementlearningselfpredictive,
9
+ title = {Data-Efficient Reinforcement Learning with Self-Predictive Representations},
10
+ author = {Max Schwarzer and Ankesh Anand and Rishab Goel and R Devon Hjelm and Aaron Courville and Philip Bachman},
11
+ year = {2021},
12
+ eprint = {2007.05929},
13
+ archivePrefix = {arXiv},
14
+ primaryClass = {cs.LG},
15
+ url = {https://arxiv.org/abs/2007.05929},
16
+ }
17
+ ```
18
+
19
+ ```bibtex
20
+ @misc{schmidt2024learningactactions,
21
+ title = {Learning to Act without Actions},
22
+ author = {Dominik Schmidt and Minqi Jiang},
23
+ year = {2024},
24
+ eprint = {2312.10812},
25
+ archivePrefix = {arXiv},
26
+ primaryClass = {cs.LG},
27
+ url = {https://arxiv.org/abs/2312.10812},
28
+ }
29
+ ```
30
+
31
+ ```bibtex
32
+ @misc{eysenbach2023contrastivelearninggoalconditionedreinforcement,
33
+ title = {Contrastive Learning as Goal-Conditioned Reinforcement Learning},
34
+ author = {Benjamin Eysenbach and Tianjun Zhang and Ruslan Salakhutdinov and Sergey Levine},
35
+ year = {2023},
36
+ eprint = {2206.07568},
37
+ archivePrefix = {arXiv},
38
+ primaryClass = {cs.LG},
39
+ url = {https://arxiv.org/abs/2206.07568},
40
+ }
41
+ ```
42
+
43
+ ```bibtex
44
+ @misc{ashlag2025stateentropyregularizationrobust,
45
+ title = {State Entropy Regularization for Robust Reinforcement Learning},
46
+ author = {Yonatan Ashlag and Uri Koren and Mirco Mutti and Esther Derman and Pierre-Luc Bacon and Shie Mannor},
47
+ year = {2025},
48
+ eprint = {2506.07085},
49
+ archivePrefix = {arXiv},
50
+ primaryClass = {cs.LG},
51
+ url = {https://arxiv.org/abs/2506.07085},
52
+ }
53
+ ```
54
+
55
+ ```bibtex
56
+ @inproceedings{park2026dual,
57
+ title = {Dual Goal Representations},
58
+ author = {Seohong Park and Deepinder Mann and Sergey Levine},
59
+ booktitle = {The Fourteenth International Conference on Learning Representations},
60
+ year = {2026},
61
+ url = {https://openreview.net/forum?id=aMKFTidLSM}
62
+ }
63
+ ```
@@ -0,0 +1 @@
1
+ from .image_wrapper import ImageObservationWrapper
@@ -0,0 +1,58 @@
1
+ from __future__ import annotations
2
+
3
+ import torch
4
+ import numpy as np
5
+
6
+ import gymnasium as gym
7
+ from PIL import Image
8
+ from einops import rearrange
9
+
10
+ # functions
11
+
12
+ def cast_tuple(t, length = 1):
13
+ return t if isinstance(t, tuple) else ((t,) * length)
14
+
15
+ # class
16
+
17
+ class ImageObservationWrapper(gym.ObservationWrapper):
18
+ def __init__(
19
+ self,
20
+ env,
21
+ image_size = (64, 64),
22
+ image_key = 'image',
23
+ resample_method = Image.BILINEAR,
24
+ normalize = True,
25
+ normalize_divisor = 255.0
26
+ ):
27
+ super().__init__(env)
28
+ self.image_size = cast_tuple(image_size, 2)
29
+ self.image_key = image_key
30
+ self.resample_method = resample_method
31
+ self.normalize = normalize
32
+ self.normalize_divisor = normalize_divisor
33
+
34
+ def render_frame(self):
35
+ img = self.env.render()
36
+ img = Image.fromarray(img).resize(self.image_size, resample = self.resample_method)
37
+ img_tensor = torch.from_numpy(np.array(img))
38
+ img = rearrange(img_tensor, 'h w c -> 1 c h w')
39
+
40
+ if self.normalize:
41
+ img = img.float() / self.normalize_divisor
42
+
43
+ return img
44
+
45
+ def observation(self, obs):
46
+ img_tensor = self.render_frame()
47
+ img_tensor = rearrange(img_tensor, '1 c h w -> c h w')
48
+
49
+ if not isinstance(obs, dict):
50
+ return dict(state = obs, **{self.image_key: img_tensor})
51
+
52
+ if self.image_key in obs:
53
+ raise ValueError(f"Key '{self.image_key}' is already present in the observation dictionary.")
54
+
55
+ obs = dict(obs)
56
+ obs.update({self.image_key: img_tensor})
57
+
58
+ return obs
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "env-ssl-wrapper"
3
- version = "0.0.1"
3
+ version = "0.0.2"
4
4
  description = "An RL environment wrapper for learning SSL in the background"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -24,10 +24,14 @@ classifiers=[
24
24
  ]
25
25
 
26
26
  dependencies = [
27
+ "discrete-continuous-embed-readout",
27
28
  "einx>=0.3.0",
28
29
  "einops>=0.8.1",
30
+ "memmap-replay-buffer",
29
31
  "torch>=2.5",
30
32
  "torch-einops-utils>=0.0.29",
33
+ "x-transformers",
34
+ "x-mlps-pytorch",
31
35
  ]
32
36
 
33
37
  [project.urls]
@@ -1,3 +0,0 @@
1
- ## env-ssl-wrapper (wip)
2
-
3
- Wrappers around environments that will take care of providing representations from self supervised learning automagically
File without changes
File without changes
File without changes