maxframe 1.0.0rc1__cp37-cp37m-win32.whl → 1.0.0rc2__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (64) hide show
  1. maxframe/_utils.cp37-win32.pyd +0 -0
  2. maxframe/codegen.py +0 -4
  3. maxframe/config/config.py +34 -2
  4. maxframe/config/validators.py +1 -0
  5. maxframe/conftest.py +2 -0
  6. maxframe/core/entity/objects.py +1 -1
  7. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  8. maxframe/dataframe/__init__.py +1 -1
  9. maxframe/dataframe/arithmetic/around.py +5 -17
  10. maxframe/dataframe/arithmetic/core.py +15 -7
  11. maxframe/dataframe/arithmetic/docstring.py +5 -55
  12. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
  13. maxframe/dataframe/core.py +5 -5
  14. maxframe/dataframe/datasource/date_range.py +2 -2
  15. maxframe/dataframe/datasource/read_odps_query.py +6 -0
  16. maxframe/dataframe/datasource/read_odps_table.py +2 -1
  17. maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
  18. maxframe/dataframe/groupby/cum.py +0 -1
  19. maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
  20. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  21. maxframe/dataframe/indexing/rename.py +3 -37
  22. maxframe/dataframe/indexing/sample.py +0 -1
  23. maxframe/dataframe/indexing/set_index.py +68 -1
  24. maxframe/dataframe/merge/merge.py +236 -2
  25. maxframe/dataframe/merge/tests/test_merge.py +123 -0
  26. maxframe/dataframe/misc/apply.py +3 -10
  27. maxframe/dataframe/misc/case_when.py +1 -1
  28. maxframe/dataframe/misc/describe.py +2 -2
  29. maxframe/dataframe/misc/drop_duplicates.py +4 -25
  30. maxframe/dataframe/misc/eval.py +4 -0
  31. maxframe/dataframe/misc/pct_change.py +1 -83
  32. maxframe/dataframe/misc/transform.py +1 -30
  33. maxframe/dataframe/misc/value_counts.py +4 -17
  34. maxframe/dataframe/missing/dropna.py +1 -1
  35. maxframe/dataframe/missing/fillna.py +5 -5
  36. maxframe/dataframe/sort/sort_values.py +1 -11
  37. maxframe/dataframe/statistics/quantile.py +5 -17
  38. maxframe/dataframe/utils.py +4 -7
  39. maxframe/learn/contrib/xgboost/dmatrix.py +2 -2
  40. maxframe/learn/contrib/xgboost/predict.py +2 -2
  41. maxframe/learn/contrib/xgboost/train.py +2 -2
  42. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  43. maxframe/odpsio/__init__.py +1 -1
  44. maxframe/odpsio/arrow.py +8 -4
  45. maxframe/odpsio/schema.py +10 -7
  46. maxframe/odpsio/tableio.py +388 -14
  47. maxframe/odpsio/tests/test_schema.py +16 -15
  48. maxframe/odpsio/tests/test_tableio.py +48 -21
  49. maxframe/protocol.py +40 -2
  50. maxframe/serialization/core.cp37-win32.pyd +0 -0
  51. maxframe/serialization/serializables/core.py +48 -9
  52. maxframe/tensor/__init__.py +59 -0
  53. maxframe/tensor/base/unique.py +2 -2
  54. maxframe/tensor/statistics/quantile.py +2 -2
  55. maxframe/tests/utils.py +11 -2
  56. maxframe/utils.py +17 -9
  57. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc2.dist-info}/METADATA +74 -1
  58. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc2.dist-info}/RECORD +64 -64
  59. maxframe_client/fetcher.py +38 -27
  60. maxframe_client/session/odps.py +5 -5
  61. maxframe_client/tests/test_fetcher.py +21 -3
  62. maxframe_client/tests/test_session.py +13 -2
  63. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc2.dist-info}/WHEEL +0 -0
  64. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -12,22 +12,37 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import datetime
16
+
15
17
  import numpy as np
16
18
  import pandas as pd
17
19
  import pyarrow as pa
20
+ import pytest
18
21
  from odps import ODPS
19
22
 
23
+ from ...config import options
20
24
  from ...tests.utils import flaky, tn
21
25
  from ...utils import config_odps_default_options
22
- from ..tableio import HaloTableIO
26
+ from ..tableio import ODPSTableIO
27
+
28
+
29
+ @pytest.fixture
30
+ def switch_table_io(request):
31
+ old_use_common_table = options.use_common_table
32
+ try:
33
+ options.use_common_table = request.param
34
+ yield
35
+ finally:
36
+ options.use_common_table = old_use_common_table
23
37
 
24
38
 
25
39
  @flaky(max_runs=3)
26
- def test_empty_table_io():
40
+ @pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
41
+ def test_empty_table_io(switch_table_io):
27
42
  config_odps_default_options()
28
43
 
29
44
  o = ODPS.from_environments()
30
- halo_table_io = HaloTableIO(o)
45
+ table_io = ODPSTableIO(o)
31
46
 
32
47
  # test read from empty table
33
48
  empty_table_name = tn("test_empty_table_halo_read")
@@ -35,42 +50,53 @@ def test_empty_table_io():
35
50
  tb = o.create_table(empty_table_name, "col1 string", lifecycle=1)
36
51
 
37
52
  try:
38
- with halo_table_io.open_reader(empty_table_name) as reader:
53
+ with table_io.open_reader(empty_table_name) as reader:
39
54
  assert len(reader.read_all()) == 0
40
55
  finally:
41
56
  tb.drop()
42
57
 
43
58
 
44
59
  @flaky(max_runs=3)
45
- def test_table_io_without_parts():
60
+ @pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
61
+ def test_table_io_without_parts(switch_table_io):
46
62
  config_odps_default_options()
47
63
 
48
64
  o = ODPS.from_environments()
49
- halo_table_io = HaloTableIO(o)
65
+ table_io = ODPSTableIO(o)
50
66
 
51
67
  # test read and write tables without partition
52
68
  no_part_table_name = tn("test_no_part_halo_write")
53
69
  o.delete_table(no_part_table_name, if_exists=True)
54
- tb = o.create_table(
55
- no_part_table_name, ",".join(f"{c} double" for c in "abcde"), lifecycle=1
56
- )
70
+ col_desc = ",".join(f"{c} double" for c in "abcde") + ", f datetime"
71
+ tb = o.create_table(no_part_table_name, col_desc, lifecycle=1)
57
72
 
58
73
  try:
59
74
  pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
60
- with halo_table_io.open_writer(no_part_table_name) as writer:
75
+ date_val = [
76
+ (
77
+ datetime.datetime.now().replace(microsecond=0)
78
+ + datetime.timedelta(seconds=i)
79
+ )
80
+ for i in range(100)
81
+ ]
82
+ pd_data["f"] = pd.Series(date_val, dtype="datetime64[ms]").dt.tz_localize(
83
+ options.local_timezone
84
+ )
85
+ with table_io.open_writer(no_part_table_name) as writer:
61
86
  writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
62
- with halo_table_io.open_reader(no_part_table_name) as reader:
87
+ with table_io.open_reader(no_part_table_name) as reader:
63
88
  pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
64
89
  finally:
65
90
  tb.drop()
66
91
 
67
92
 
68
93
  @flaky(max_runs=3)
69
- def test_table_io_with_range_reader():
94
+ @pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
95
+ def test_table_io_with_range_reader(switch_table_io):
70
96
  config_odps_default_options()
71
97
 
72
98
  o = ODPS.from_environments()
73
- halo_table_io = HaloTableIO(o)
99
+ table_io = ODPSTableIO(o)
74
100
 
75
101
  # test read and write tables without partition
76
102
  no_part_table_name = tn("test_no_part_halo_write")
@@ -81,15 +107,15 @@ def test_table_io_with_range_reader():
81
107
 
82
108
  try:
83
109
  pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
84
- with halo_table_io.open_writer(no_part_table_name) as writer:
110
+ with table_io.open_writer(no_part_table_name) as writer:
85
111
  writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
86
112
 
87
- with halo_table_io.open_reader(
113
+ with table_io.open_reader(
88
114
  no_part_table_name, start=None, stop=100, row_batch_size=10
89
115
  ) as reader:
90
116
  pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
91
117
 
92
- with halo_table_io.open_reader(
118
+ with table_io.open_reader(
93
119
  no_part_table_name,
94
120
  start=-2,
95
121
  stop=-52,
@@ -105,11 +131,12 @@ def test_table_io_with_range_reader():
105
131
 
106
132
 
107
133
  @flaky(max_runs=3)
108
- def test_table_io_with_parts():
134
+ @pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
135
+ def test_table_io_with_parts(switch_table_io):
109
136
  config_odps_default_options()
110
137
 
111
138
  o = ODPS.from_environments()
112
- halo_table_io = HaloTableIO(o)
139
+ table_io = ODPSTableIO(o)
113
140
 
114
141
  # test read and write tables with partition
115
142
  parted_table_name = tn("test_parted_halo_write")
@@ -122,11 +149,11 @@ def test_table_io_with_parts():
122
149
 
123
150
  try:
124
151
  pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
125
- with halo_table_io.open_writer(parted_table_name, "pt=test") as writer:
152
+ with table_io.open_writer(parted_table_name, "pt=test") as writer:
126
153
  writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
127
- with halo_table_io.open_reader(parted_table_name, "pt=test") as reader:
154
+ with table_io.open_reader(parted_table_name, "pt=test") as reader:
128
155
  pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
129
- with halo_table_io.open_reader(
156
+ with table_io.open_reader(
130
157
  parted_table_name, "pt=test", partition_columns=True
131
158
  ) as reader:
132
159
  expected_data = pd_data.copy()
maxframe/protocol.py CHANGED
@@ -154,6 +154,9 @@ class ODPSTableResultInfo(ResultInfo):
154
154
  partition_specs: Optional[List[str]] = ListField(
155
155
  "partition_specs", FieldTypes.string, default=None
156
156
  )
157
+ table_meta: Optional["DataFrameTableMeta"] = ReferenceField(
158
+ "table_meta", default=None
159
+ )
157
160
 
158
161
  def __init__(self, result_type: ResultType = None, **kw):
159
162
  result_type = result_type or ResultType.ODPS_TABLE
@@ -164,8 +167,17 @@ class ODPSTableResultInfo(ResultInfo):
164
167
  ret["full_table_name"] = self.full_table_name
165
168
  if self.partition_specs:
166
169
  ret["partition_specs"] = self.partition_specs
170
+ if self.table_meta:
171
+ ret["table_meta"] = self.table_meta.to_json()
167
172
  return ret
168
173
 
174
+ @classmethod
175
+ def _json_to_kwargs(cls, serialized: dict) -> dict:
176
+ kw = super()._json_to_kwargs(serialized)
177
+ if "table_meta" in kw:
178
+ kw["table_meta"] = DataFrameTableMeta.from_json(kw["table_meta"])
179
+ return kw
180
+
169
181
 
170
182
  class ODPSVolumeResultInfo(ResultInfo):
171
183
  _result_type = ResultType.ODPS_VOLUME
@@ -469,7 +481,7 @@ class DecrefRequest(Serializable):
469
481
  keys: List[str] = ListField("keys", FieldTypes.string, default=None)
470
482
 
471
483
 
472
- class DataFrameTableMeta(Serializable):
484
+ class DataFrameTableMeta(JsonSerializable):
473
485
  __slots__ = "_pd_column_names", "_pd_index_level_names"
474
486
 
475
487
  table_name: Optional[str] = StringField("table_name", default=None)
@@ -500,7 +512,7 @@ class DataFrameTableMeta(Serializable):
500
512
  self._pd_index_level_names = self.pd_index_dtypes.index.tolist()
501
513
  return self._pd_index_level_names
502
514
 
503
- def __eq__(self, other: "Serializable") -> bool:
515
+ def __eq__(self, other: "DataFrameTableMeta") -> bool:
504
516
  if not isinstance(other, type(self)):
505
517
  return False
506
518
  for k in self._FIELDS:
@@ -511,3 +523,29 @@ class DataFrameTableMeta(Serializable):
511
523
  if not is_same:
512
524
  return False
513
525
  return True
526
+
527
+ def to_json(self) -> dict:
528
+ b64_pk = lambda x: base64.b64encode(pickle.dumps(x))
529
+ ret = {
530
+ "table_name": self.table_name,
531
+ "type": self.type.value,
532
+ "table_column_names": self.table_column_names,
533
+ "table_index_column_names": self.table_index_column_names,
534
+ "pd_column_dtypes": b64_pk(self.pd_column_dtypes),
535
+ "pd_column_level_names": b64_pk(self.pd_column_level_names),
536
+ "pd_index_dtypes": b64_pk(self.pd_index_dtypes),
537
+ }
538
+ return ret
539
+
540
+ @classmethod
541
+ def from_json(cls, serialized: dict) -> "DataFrameTableMeta":
542
+ b64_upk = lambda x: pickle.loads(base64.b64decode(x))
543
+ serialized.update(
544
+ {
545
+ "type": OutputType(serialized["type"]),
546
+ "pd_column_dtypes": b64_upk(serialized["pd_column_dtypes"]),
547
+ "pd_column_level_names": b64_upk(serialized["pd_column_level_names"]),
548
+ "pd_index_dtypes": b64_upk(serialized["pd_index_dtypes"]),
549
+ }
550
+ )
551
+ return DataFrameTableMeta(**serialized)
Binary file
@@ -51,7 +51,10 @@ def _is_field_primitive_compound(field: Field):
51
51
  class SerializableMeta(type):
52
52
  def __new__(mcs, name: str, bases: Tuple[Type], properties: Dict):
53
53
  # All the fields including misc fields.
54
- name_hash = hash(f"{properties.get('__module__')}.{name}")
54
+ legacy_name_hash = hash(f"{properties.get('__module__')}.{name}")
55
+ name_hash = hash(
56
+ f"{properties.get('__module__')}.{properties.get('__qualname__')}"
57
+ )
55
58
  all_fields = dict()
56
59
  # mapping field names to base classes
57
60
  field_to_cls_hash = dict()
@@ -107,6 +110,10 @@ class SerializableMeta(type):
107
110
  slots.update(properties_field_slot_names)
108
111
 
109
112
  properties = properties_without_fields
113
+
114
+ # todo remove this prop when all versions below v1.0.0rc1 is eliminated
115
+ properties["_LEGACY_NAME_HASH"] = legacy_name_hash
116
+
110
117
  properties["_NAME_HASH"] = name_hash
111
118
  properties["_FIELDS"] = all_fields
112
119
  properties["_FIELD_ORDER"] = field_order
@@ -210,8 +217,8 @@ class SerializableSerializer(Serializer):
210
217
  """
211
218
 
212
219
  @classmethod
213
- def _get_obj_field_count_key(cls, obj: Serializable):
214
- return f"FC_{obj._NAME_HASH}"
220
+ def _get_obj_field_count_key(cls, obj: Serializable, legacy: bool = False):
221
+ return f"FC_{obj._NAME_HASH if not legacy else obj._LEGACY_NAME_HASH}"
215
222
 
216
223
  @classmethod
217
224
  def _get_field_values(cls, obj: Serializable, fields):
@@ -290,6 +297,12 @@ class SerializableSerializer(Serializer):
290
297
  server_cls_to_field_count = obj_class._CLS_TO_NON_PRIMITIVE_FIELD_COUNT
291
298
  server_fields = obj_class._NON_PRIMITIVE_FIELDS
292
299
 
300
+ legacy_to_new_hash = {
301
+ c._LEGACY_NAME_HASH: c._NAME_HASH
302
+ for c in obj_class.__mro__
303
+ if hasattr(c, "_NAME_HASH") and c._LEGACY_NAME_HASH != c._NAME_HASH
304
+ }
305
+
293
306
  if client_cls_to_field_count:
294
307
  field_num, server_field_num = 0, 0
295
308
  for cls_hash, count in client_cls_to_field_count.items():
@@ -301,20 +314,40 @@ class SerializableSerializer(Serializer):
301
314
  if not is_primitive or value != {}:
302
315
  cls._set_field_value(obj, field, value)
303
316
  field_num += count
304
- server_field_num += server_cls_to_field_count[cls_hash]
317
+ try:
318
+ server_field_num += server_cls_to_field_count[cls_hash]
319
+ except KeyError:
320
+ try:
321
+ # todo remove this fallback when all
322
+ # versions below v1.0.0rc1 is eliminated
323
+ server_field_num += server_cls_to_field_count[
324
+ legacy_to_new_hash[cls_hash]
325
+ ]
326
+ except KeyError:
327
+ # it is possible that certain type of field does not exist
328
+ # at server side
329
+ pass
305
330
  else:
331
+ # handle legacy serialization style, with all fields sorted by name
306
332
  # todo remove this branch when all versions below v0.1.0b5 is eliminated
307
333
  from .field import AnyField
308
334
 
309
- # legacy serialization style, with all fields sorted by name
310
335
  if is_primitive:
311
- field_attr = "_legacy_deprecated_primitives"
336
+ new_field_attr = "_legacy_new_primitives"
337
+ deprecated_field_attr = "_legacy_deprecated_primitives"
312
338
  else:
313
- field_attr = "_legacy_deprecated_non_primitives"
339
+ new_field_attr = "_legacy_new_non_primitives"
340
+ deprecated_field_attr = "_legacy_deprecated_non_primitives"
341
+
342
+ # remove fields added on later releases
343
+ new_names = set(getattr(obj_class, new_field_attr, None) or [])
344
+ server_fields = [f for f in server_fields if f.name not in new_names]
345
+
346
+ # fill fields deprecated on later releases
314
347
  deprecated_fields = []
315
348
  deprecated_names = set()
316
- if hasattr(obj_class, field_attr):
317
- deprecated_names = set(getattr(obj_class, field_attr))
349
+ if hasattr(obj_class, deprecated_field_attr):
350
+ deprecated_names = set(getattr(obj_class, deprecated_field_attr))
318
351
  for field_name in deprecated_names:
319
352
  field = AnyField(tag=field_name)
320
353
  field.name = field_name
@@ -342,6 +375,12 @@ class SerializableSerializer(Serializer):
342
375
  field_count_data = self.get_public_data(
343
376
  context, self._get_obj_field_count_key(obj)
344
377
  )
378
+ if field_count_data is None:
379
+ # todo remove this fallback when all
380
+ # versions below v1.0.0rc1 is eliminated
381
+ field_count_data = self.get_public_data(
382
+ context, self._get_obj_field_count_key(obj, legacy=True)
383
+ )
345
384
  if field_count_data is not None:
346
385
  cls_to_prim_key, cls_to_non_prim_key = msgpack.loads(field_count_data)
347
386
  cls_to_prim_key = dict(cls_to_prim_key)
@@ -180,4 +180,63 @@ from .reduction import std, sum, var
180
180
  from .reshape import reshape
181
181
  from .ufunc import ufunc
182
182
 
183
+ # isort: off
184
+ # noinspection PyUnresolvedReferences
185
+ from numpy import (
186
+ NAN,
187
+ NINF,
188
+ AxisError,
189
+ Inf,
190
+ NaN,
191
+ e,
192
+ errstate,
193
+ geterr,
194
+ inf,
195
+ nan,
196
+ newaxis,
197
+ pi,
198
+ seterr,
199
+ )
200
+
201
+ # import numpy types
202
+ # noinspection PyUnresolvedReferences
203
+ from numpy import (
204
+ bool_ as bool,
205
+ bytes_,
206
+ cfloat,
207
+ character,
208
+ complex64,
209
+ complex128,
210
+ complexfloating,
211
+ datetime64,
212
+ double,
213
+ dtype,
214
+ flexible,
215
+ float16,
216
+ float32,
217
+ float64,
218
+ floating,
219
+ generic,
220
+ inexact,
221
+ int8,
222
+ int16,
223
+ int32,
224
+ int64,
225
+ intc,
226
+ intp,
227
+ number,
228
+ integer,
229
+ object_ as object,
230
+ signedinteger,
231
+ timedelta64,
232
+ uint,
233
+ uint8,
234
+ uint16,
235
+ uint32,
236
+ uint64,
237
+ unicode_,
238
+ unsignedinteger,
239
+ void,
240
+ )
241
+
183
242
  del fetch, ufunc
@@ -15,7 +15,7 @@
15
15
 
16
16
  import numpy as np
17
17
 
18
- from ... import opcodes as OperandDef
18
+ from ... import opcodes
19
19
  from ...serialization.serializables import BoolField, Int32Field
20
20
  from ..core import TensorOrder
21
21
  from ..operators import TensorHasInput, TensorOperatorMixin
@@ -23,7 +23,7 @@ from ..utils import validate_axis
23
23
 
24
24
 
25
25
  class TensorUnique(TensorHasInput, TensorOperatorMixin):
26
- _op_type_ = OperandDef.UNIQUE
26
+ _op_type_ = opcodes.UNIQUE
27
27
 
28
28
  return_index = BoolField("return_index", default=False)
29
29
  return_inverse = BoolField("return_inverse", default=False)
@@ -16,7 +16,7 @@ from collections.abc import Iterable
16
16
 
17
17
  import numpy as np
18
18
 
19
- from ... import opcodes as OperandDef
19
+ from ... import opcodes
20
20
  from ...core import ENTITY_TYPE
21
21
  from ...serialization.serializables import AnyField, BoolField, KeyField, StringField
22
22
  from ..core import TENSOR_TYPE, TensorOrder
@@ -43,7 +43,7 @@ q_error_msg = "Quantiles must be in the range [0, 1]"
43
43
 
44
44
  class TensorQuantile(TensorOperator, TensorOperatorMixin):
45
45
  __slots__ = ("q_error_msg",)
46
- _op_type_ = OperandDef.QUANTILE
46
+ _op_type_ = opcodes.QUANTILE
47
47
 
48
48
  a = KeyField("a")
49
49
  q = AnyField("q")
maxframe/tests/utils.py CHANGED
@@ -14,6 +14,7 @@
14
14
 
15
15
  import asyncio
16
16
  import functools
17
+ import hashlib
17
18
  import os
18
19
  import queue
19
20
  import socket
@@ -25,7 +26,7 @@ import pytest
25
26
  from tornado import netutil
26
27
 
27
28
  from ..core import Tileable, TileableGraph
28
- from ..utils import create_event, lazy_import
29
+ from ..utils import create_sync_primitive, lazy_import, to_binary
29
30
 
30
31
  try:
31
32
  from flaky import flaky
@@ -102,7 +103,7 @@ def run_app_in_thread(app_func):
102
103
  def fixture_func(*args, **kwargs):
103
104
  app_loop = asyncio.new_event_loop()
104
105
  q = queue.Queue()
105
- exit_event = create_event(app_loop)
106
+ exit_event = create_sync_primitive(asyncio.Event, app_loop)
106
107
  app_thread = Thread(
107
108
  name="TestAppThread",
108
109
  target=app_thread_func,
@@ -162,3 +163,11 @@ def require_hadoop(func):
162
163
  not os.environ.get("WITH_HADOOP"), reason="Only run when hadoop is installed"
163
164
  )(func)
164
165
  return func
166
+
167
+
168
+ def get_test_unique_name(size=None):
169
+ test_name = os.getenv("PYTEST_CURRENT_TEST", "pyodps_test")
170
+ digest = hashlib.md5(to_binary(test_name)).hexdigest()
171
+ if size:
172
+ digest = digest[:size]
173
+ return digest + "_" + str(os.getpid())
maxframe/utils.py CHANGED
@@ -436,19 +436,27 @@ async def to_thread_pool(func, *args, pool=None, **kwargs):
436
436
  return await loop.run_in_executor(pool, func_call)
437
437
 
438
438
 
439
- def create_event(loop: asyncio.AbstractEventLoop) -> asyncio.Event:
439
+ _PrimitiveType = TypeVar("_PrimitiveType")
440
+
441
+
442
+ def create_sync_primitive(
443
+ cls: Type[_PrimitiveType], loop: asyncio.AbstractEventLoop
444
+ ) -> _PrimitiveType:
440
445
  """
441
- Create an asyncio.Event in a certain event loop.
446
+ Create an asyncio sync primitive (locks, events, etc.)
447
+ in a certain event loop.
442
448
  """
443
- if sys.version_info[1] < 10 or loop is None:
444
- return asyncio.Event(loop=loop)
449
+ if sys.version_info[1] < 10:
450
+ return cls(loop=loop)
445
451
 
446
452
  # From Python3.10 the loop parameter has been removed. We should work around here.
447
- old_loop = asyncio.get_running_loop()
448
- asyncio.set_event_loop(loop)
449
- event = asyncio.Event()
450
- asyncio.set_event_loop(old_loop)
451
- return event
453
+ old_loop = asyncio.get_event_loop()
454
+ try:
455
+ asyncio.set_event_loop(loop)
456
+ primitive = cls()
457
+ finally:
458
+ asyncio.set_event_loop(old_loop)
459
+ return primitive
452
460
 
453
461
 
454
462
  class ToThreadCancelledError(asyncio.CancelledError):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maxframe
3
- Version: 1.0.0rc1
3
+ Version: 1.0.0rc2
4
4
  Summary: MaxFrame operator-based data analyze framework
5
5
  Requires-Dist: numpy <2.0.0,>=1.19.0
6
6
  Requires-Dist: pandas >=1.0.0
@@ -102,3 +102,76 @@ License
102
102
 
103
103
  Licensed under the `Apache License
104
104
  2.0 <https://www.apache.org/licenses/LICENSE-2.0.html>`__.
105
+ MaxCompute MaxFrame Client
106
+ ==========================
107
+
108
+ MaxFrame is a computational framework created by Alibaba Cloud to
109
+ provide a way for Python developers to parallelize their code with
110
+ MaxCompute. It creates a runnable computation graph locally, submits it
111
+ to MaxCompute to execute and obtains results from MaxCompute.
112
+
113
+ MaxFrame client is the client of MaxFrame. Currently it provides a
114
+ DataFrame-based SDK with compatible APIs for pandas. In future, other
115
+ common Python libraries like numpy and scikit-learn will be added as
116
+ well. Python 3.7 is recommended for MaxFrame client to enable all
117
+ functionalities while supports for higher Python versions are on the
118
+ way.
119
+
120
+ Installation
121
+ ------------
122
+
123
+ You may install MaxFrame client through PIP:
124
+
125
+ .. code:: bash
126
+
127
+ pip install maxframe
128
+
129
+ Latest beta version can be installed with ``--pre`` argument:
130
+
131
+ .. code:: bash
132
+
133
+ pip install --pre maxframe
134
+
135
+ You can also install MaxFrame client from source code:
136
+
137
+ .. code:: bash
138
+
139
+ pip install git+https://github.com/aliyun/alibabacloud-odps-maxframe-client.git
140
+
141
+ Getting started
142
+ ---------------
143
+
144
+ We show a simple code example of MaxFrame client which read data from a
145
+ MaxCompute table, performs some simple data transform and writes back
146
+ into MaxCompute.
147
+
148
+ .. code:: python
149
+
150
+ import maxframe.dataframe as md
151
+ import os
152
+ from maxframe import new_session
153
+ from odps import ODPS
154
+
155
+ o = ODPS(
156
+ os.getenv('ALIBABA_CLOUD_ACCESS_KEY_ID'),
157
+ os.getenv('ALIBABA_CLOUD_ACCESS_KEY_SECRET'),
158
+ project='your-default-project',
159
+ endpoint='your-end-point',
160
+ )
161
+ session = new_session(o)
162
+
163
+ df = md.read_odps_table("source_table")
164
+ df["A"] = "prefix_" + df["A"]
165
+ md.to_odps_table(df, "prefix_source_table")
166
+
167
+ Documentation
168
+ -------------
169
+
170
+ Detailed documentations can be found
171
+ `here <https://maxframe.readthedocs.io>`__.
172
+
173
+ License
174
+ -------
175
+
176
+ Licensed under the `Apache License
177
+ 2.0 <https://www.apache.org/licenses/LICENSE-2.0.html>`__.