PyStellarDB 0.11.0__py2.py3-none-any.whl → 0.13.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2014 Transwarp, Inc.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -1,37 +1,34 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: PyStellarDB
3
- Version: 0.11.0
3
+ Version: 0.13.2
4
4
  Summary: Python interface to StellarDB
5
5
  Home-page: https://github.com/WarpCloud/PyStellarDB
6
6
  Author: Zhiping Wang
7
7
  Author-email: zhiping.wang@transwarp.io
8
8
  License: Apache License, Version 2.0
9
- Platform: UNKNOWN
10
9
  Classifier: Intended Audience :: Developers
11
10
  Classifier: License :: OSI Approved :: Apache Software License
12
11
  Classifier: Operating System :: OS Independent
13
12
  Classifier: Topic :: Database :: Front-Ends
14
- Requires-Python: >=2.7,<=3.7
15
- Provides-Extra: hive
16
- Provides-Extra: sqlalchemy
17
- Provides-Extra: kerberos
18
- Provides-Extra: presto
13
+ Requires-Python: >=2.7
14
+ License-File: LICENSE
19
15
  Requires-Dist: future
20
16
  Requires-Dist: python-dateutil
21
17
  Requires-Dist: pyhive
22
18
  Requires-Dist: sasl
23
19
  Requires-Dist: thrift
24
- Requires-Dist: thrift-sasl (>=0.3.0)
25
- Requires-Dist: pyspark (>=2.4.0)
20
+ Requires-Dist: thrift-sasl >=0.3.0
26
21
  Provides-Extra: hive
27
- Requires-Dist: sasl (>=0.2.1); extra == 'hive'
28
- Requires-Dist: thrift (>=0.10.0); extra == 'hive'
22
+ Requires-Dist: sasl >=0.2.1 ; extra == 'hive'
23
+ Requires-Dist: thrift >=0.10.0 ; extra == 'hive'
29
24
  Provides-Extra: kerberos
30
- Requires-Dist: requests-kerberos (>=0.12.0); extra == 'kerberos'
25
+ Requires-Dist: requests-kerberos >=0.12.0 ; extra == 'kerberos'
31
26
  Provides-Extra: presto
32
- Requires-Dist: requests (>=1.0.0); extra == 'presto'
27
+ Requires-Dist: requests >=1.0.0 ; extra == 'presto'
28
+ Provides-Extra: pyspark
29
+ Requires-Dist: pyspark >=2.4.0 ; extra == 'pyspark'
33
30
  Provides-Extra: sqlalchemy
34
- Requires-Dist: sqlalchemy (>=1.3.0); extra == 'sqlalchemy'
31
+ Requires-Dist: sqlalchemy >=1.3.0 ; extra == 'sqlalchemy'
35
32
 
36
33
  PyStellarDB
37
34
  ===========
@@ -128,7 +125,7 @@ Execute Graph Query and change to a PySpark RDD object
128
125
 
129
126
  from pyspark import SparkContext
130
127
  from pystellardb import stellar_hive
131
-
128
+
132
129
  sc = SparkContext("local", "Demo App")
133
130
 
134
131
  conn = stellar_hive.StellarConnection(host="localhost", port=10000, graph_name='pokemon')
@@ -153,7 +150,7 @@ Execute Hive Query and change to a PySpark RDD object
153
150
 
154
151
  from pyspark import SparkContext
155
152
  from pystellardb import stellar_hive
156
-
153
+
157
154
  sc = SparkContext("local", "Demo App")
158
155
 
159
156
  conn = stellar_hive.StellarConnection(host="localhost", port=10000)
@@ -174,15 +171,11 @@ Dependencies
174
171
  Required:
175
172
  ------------
176
173
 
177
- - Python 2.7+ / Less than Python 3.7
174
+ - Python 2.7+ / Python 3
178
175
 
179
176
  System SASL
180
177
  ------------
181
178
 
182
- Different systems require different packages to be installed to enable SASL support.
183
- Some examples of how to install the packages on different distributions
184
- follow.
185
-
186
179
  Ubuntu:
187
180
 
188
181
  .. code-block:: bash
@@ -197,14 +190,14 @@ RHEL/CentOS:
197
190
  yum install cyrus-sasl-md5 cyrus-sasl-plain cyrus-sasl-gssapi cyrus-sasl-devel
198
191
  yum install gcc-c++ python-devel.x86_64 #Update python and gcc if needed
199
192
 
200
- # If your Python environment is 3.X, then you may need to compile and reinstall Python
201
193
  # if pip3 install fails with a message like 'Can't connect to HTTPS URL because the SSL module is not available'
194
+ # you may need to update ssl & reinstall python
202
195
 
203
196
  # 1. Download a higher version of openssl, e.g: https://www.openssl.org/source/openssl-1.1.1k.tar.gz
204
197
  # 2. Install openssl: ./config && make && make install
205
198
  # 3. Link openssl: echo /usr/local/lib64/ > /etc/ld.so.conf.d/openssl-1.1.1.conf
206
199
  # 4. Update dynamic lib: ldconfig -v
207
- # 5. Download a Python source package
200
+ # 5. Uninstall Python & Download a new Python source package
208
201
  # 6. vim Modules/Setup, search '_socket socketmodule.c', uncomment
209
202
  # _socket socketmodule.c
210
203
  # SSL=/usr/local/ssl
@@ -221,13 +214,12 @@ Windows:
221
214
  # There are 3 ways of installing sasl for python on windows
222
215
  # 1. (recommended) Download a .whl version of sasl from https://www.lfd.uci.edu/~gohlke/pythonlibs/#sasl
223
216
  # 2. (recommended) If using anaconda, use conda install sasl.
224
- # 3. Install Microsoft Visual C++ 9.0/14.0 buildtools for python2.7/3.x, then pip install sasl(under test).
217
+ # 3. Install Microsoft Visual C++ 9.0/14.0 buildtools for python2.7/3.x, then pip install sasl.
225
218
 
226
219
  Notices
227
220
  =======
228
221
 
229
- If you install pystellardb >= 0.9, then it will install a beeline command into system.
230
- Delete /usr/local/bin/beeline if you don't need it.
222
+ Pystellardb >= 0.9 contains beeline installation to /usr/local/bin/beeline.
231
223
 
232
224
  Requirements
233
225
  ============
@@ -244,12 +236,12 @@ PyHive works with
244
236
  Windows Kerberos Configuration
245
237
  ==============================
246
238
 
247
- If you're connecting to databases using Kerberos authentication from Windows platform,
248
- you'll need to install & configure Kerberos for Windows first.
239
+ Windows Kerberos configuration can be a little bit tricky and may need a few instructions.
240
+ First, you'll need to install & configure Kerberos for Windows.
249
241
  Get it from http://web.mit.edu/kerberos/dist/
250
242
 
251
243
  After installation, configure the environment variables.
252
- Make sure your Kerberos variable is set ahead of JDK variable(If you have JDK), because JDK also has kinit etc.
244
+ Make sure the position of your Kerberos variable is ahead of JDK variable, avoid using kinit command located in JDK path.
253
245
 
254
246
  Find /etc/krb5.conf on your KDC, copy it into krb5.ini on Windows with some modifications.
255
247
  e.g.(krb5.conf on KDC):
@@ -298,7 +290,7 @@ Modify it, delete [logging] and default_ccache_name in [libdefaults]:
298
290
  kdc = host2:1088
299
291
  }
300
292
 
301
- This is your krb5.ini for Windows Kerberos. Put it at those 3 places:
293
+ Above is your krb5.ini for Kerberos on Windows. Put it at 3 places:
302
294
 
303
295
  C:\ProgramData\MIT\Kerberos5\krb5.ini
304
296
 
@@ -307,7 +299,7 @@ This is your krb5.ini for Windows Kerberos. Put it at those 3 places:
307
299
  C:\Windows\krb5.ini
308
300
 
309
301
 
310
- Finally, configure hosts at: C:/Windows/System32/drivers/etc/hosts
302
+ Finally, configure hosts file at: C:/Windows/System32/drivers/etc/hosts
311
303
  Add ip mappings of host1, host2 in the previous example. e.g.
312
304
 
313
305
  .. code-block:: bash
@@ -315,11 +307,9 @@ Add ip mappings of host1, host2 in the previous example. e.g.
315
307
  10.6.6.96 host1
316
308
  10.6.6.97 host2
317
309
 
318
- Now, you can run kinit in the command line!
310
+ Now, you can try running kinit in your command line!
319
311
 
320
312
  Testing
321
313
  =======
322
314
 
323
315
  On his way
324
-
325
-
@@ -0,0 +1,10 @@
1
+ pystellardb/__init__.py,sha256=JOl41NviMN-qDV0Z8ZPmhNIxvgyauGGJHdB4A-8MhqM,93
2
+ pystellardb/_version.py,sha256=Vt7qCjCMBamE10PReIKwIvI02pMh8mdLJE8ZY2c6T54,498
3
+ pystellardb/graph_types.py,sha256=j9ZEvnTVRFOttg28rcYvOFzfoOBJcRxXxySKIzEcR-I,13098
4
+ pystellardb/stellar_hive.py,sha256=Bes99go4oKszP0RiD3OYG3W5g0Sx0cnaXf2yWOosXk0,14010
5
+ pystellardb/stellar_rdd.py,sha256=TYwsWYeCxfOliGq1kV3ArNXdye55cKWZF7s9M9nDdt4,1324
6
+ PyStellarDB-0.13.2.dist-info/LICENSE,sha256=1qDFxrywejs7xNBfOr6T-7lOuqDgSNIES77kTYege3w,560
7
+ PyStellarDB-0.13.2.dist-info/METADATA,sha256=sY89aLWXtPh-MetCwvHzXvwj37_-fqujLLoOqKjMaf8,9390
8
+ PyStellarDB-0.13.2.dist-info/WHEEL,sha256=_4XEmVmaBFWtekSGrbfOGNjC2I5lUr0lZSRblBllIFA,109
9
+ PyStellarDB-0.13.2.dist-info/top_level.txt,sha256=DRk-SeGVCdVAzv2CwFmdu75Yo7DgjUA3Hpu-9l8qPuU,12
10
+ PyStellarDB-0.13.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.31.1)
2
+ Generator: setuptools (70.1.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py2-none-any
5
5
  Tag: py3-none-any
pystellardb/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2022-03-22T19:24:40+0800",
11
+ "date": "2024-09-05T19:42:39+0800",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "f97fb8f8f488a4f3201f61b29a1bc421a3c88ac2",
15
- "version": "0.11.0"
14
+ "full-revisionid": "9e31319f3dbef3dc053f379b94f099e358d589a5",
15
+ "version": "0.13.2"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -18,6 +18,7 @@ class GraphElement(with_metaclass(abc.ABCMeta, object)):
18
18
  self._label = label
19
19
  self._fields = {}
20
20
  self._tags = []
21
+ self._rowKeyHexString = None
21
22
 
22
23
  def getLabel(self):
23
24
  return self._label
@@ -40,6 +41,12 @@ class GraphElement(with_metaclass(abc.ABCMeta, object)):
40
41
  def setTags(self, newTags):
41
42
  self._tags = newTags
42
43
 
44
+ def setRowKeyHexString(self, rowkey):
45
+ self._rowKeyHexString = rowkey
46
+
47
+ def getRowKeyHexString(self):
48
+ return self._rowKeyHexString
49
+
43
50
 
44
51
  class Vertex(GraphElement):
45
52
  """
@@ -58,6 +65,7 @@ class Vertex(GraphElement):
58
65
  'type': 'vertex',
59
66
  'label': self._label,
60
67
  'uid': self._uid,
68
+ 'RowKeyHexString': self._rowKeyHexString,
61
69
  }
62
70
 
63
71
  if self._tags is not None and len(self._tags) > 0:
@@ -69,7 +77,7 @@ class Vertex(GraphElement):
69
77
  return m
70
78
 
71
79
  def __str__(self):
72
- return json.dumps(self.toJSON())
80
+ return json.dumps(self.toJSON(), ensure_ascii=False)
73
81
 
74
82
  @staticmethod
75
83
  def parseVertexFromJson(json_str):
@@ -84,16 +92,22 @@ class Vertex(GraphElement):
84
92
  if 'labels' not in m:
85
93
  raise ValueError("Could not find label in JSON")
86
94
 
87
- if '__uid' not in m['properties']:
95
+ prop_dict = m['properties']
96
+
97
+ if '__uid' not in prop_dict:
88
98
  raise ValueError("Could not find uid in JSON")
89
99
 
90
- vertex = Vertex(m['properties']['__uid'], m['labels'][0])
100
+ vertex = Vertex(prop_dict['__uid'], m['labels'][0])
91
101
 
92
- for key in m['properties'].keys():
102
+ for key in prop_dict.keys():
93
103
  if key != '__uid' and key != '__tags':
94
- vertex.setFeild(key, m['properties'][key])
104
+ vertex.setFeild(key, prop_dict[key])
105
+
106
+ if '__tags' in prop_dict:
107
+ vertex.setTags(prop_dict['__tags'])
95
108
 
96
- vertex.setTags(m['properties']['__tags'])
109
+ rk = " ".join(map(lambda x: str(x), m['entityKey']))
110
+ vertex.setRowKeyHexString(rk)
97
111
 
98
112
  return vertex
99
113
 
@@ -110,6 +124,31 @@ class Vertex(GraphElement):
110
124
  label_in_little_endian.reverse()
111
125
  return int(binascii.hexlify(bytearray(label_in_little_endian)), 16)
112
126
 
127
+ @staticmethod
128
+ def parseShardIdFromRKV18(rk):
129
+ """Parse shard id from vertex row key in byte array for graphSchema V18"""
130
+ shard_id = (rk[0] & 0xFF) << 8
131
+ shard_id |= rk[1] & 0xF0
132
+ return int(shard_id >> 4)
133
+
134
+ @staticmethod
135
+ def parseLabelIdxFromRKV18(rk):
136
+ """Parse label index from vertex row key in byte array for graphSchema V18"""
137
+ label_index = (rk[1] & 0x0F) << 8
138
+ label_index |= rk[2] & 0xFF
139
+ return int(label_index)
140
+
141
+ @staticmethod
142
+ def parseInnerIdFromRKV18(rk, offset):
143
+ """Parse long type inner id from vertex row key in byte array for graphSchema V18"""
144
+ ID_LEN = 8
145
+ inner_id = rk[offset + ID_LEN - 1] & 0x00FF
146
+ inner_id |= (rk[offset + ID_LEN - 2] & 0x00FF) << 8
147
+ inner_id |= (rk[offset + ID_LEN - 3] & 0x00FF) << 16
148
+ inner_id |= (rk[offset + ID_LEN - 4] & 0x00FF) << 24
149
+ inner_id |= (rk[offset + ID_LEN - 5] & 0x00FF) << 32
150
+ return int(inner_id)
151
+
113
152
 
114
153
  class Edge(GraphElement):
115
154
  """
@@ -148,6 +187,7 @@ class Edge(GraphElement):
148
187
  'euid': self._uid,
149
188
  'startNode': self._startNode.toJSON(),
150
189
  'endNode': self._endNode.toJSON(),
190
+ 'RowKeyHexString': self._rowKeyHexString,
151
191
  }
152
192
 
153
193
  if self._tags is not None and len(self._tags) > 0:
@@ -159,7 +199,7 @@ class Edge(GraphElement):
159
199
  return m
160
200
 
161
201
  def __str__(self):
162
- return json.dumps(self.toJSON())
202
+ return json.dumps(self.toJSON(), ensure_ascii=False)
163
203
 
164
204
  @staticmethod
165
205
  def parseEdgeFromJson(schema, json_str):
@@ -176,27 +216,42 @@ class Edge(GraphElement):
176
216
 
177
217
  edge = Edge(m['labels'][0])
178
218
 
219
+ rk = " ".join(map(lambda x: str(x), m['entityKey']))
220
+ edge.setRowKeyHexString(rk)
221
+
222
+ prop_dict = m['properties']
223
+
179
224
  # parse start node
180
225
  if 'startKey' not in m:
181
226
  raise ValueError("Could not find start node entity key in JSON")
182
227
 
183
- startUid = Vertex.parseUidFromRK(m['startKey'])
184
- startLabelIdx = Vertex.parseLabelIdxFromRK(m['startKey'])
228
+ if schema.getVersion() == 18:
229
+ startUid = prop_dict['__srcuid']
230
+ startLabelIdx = Vertex.parseLabelIdxFromRKV18(m['startKey'])
231
+ else:
232
+ startUid = Vertex.parseUidFromRK(m['startKey'])
233
+ startLabelIdx = Vertex.parseLabelIdxFromRK(m['startKey'])
185
234
  startLabel = schema.getVertexLabel(startLabelIdx)
186
235
 
187
236
  if startLabel is None:
188
237
  raise ValueError(
189
238
  'Could not find start node label with label index `{}`'.format(
190
239
  startLabelIdx))
191
-
192
- edge.setStartNode(Vertex(startUid, startLabel))
240
+
241
+ start_node = Vertex(startUid, startLabel)
242
+ start_node.setRowKeyHexString(" ".join(map(lambda x: str(x), m['entityKey'][:8])))
243
+ edge.setStartNode(start_node)
193
244
 
194
245
  # parse end node
195
246
  if 'endKey' not in m:
196
247
  raise ValueError("Could not find end node entity key in JSON")
197
248
 
198
- endUid = Vertex.parseUidFromRK(m['endKey'])
199
- endLabelIdx = Vertex.parseLabelIdxFromRK(m['endKey'])
249
+ if schema.getVersion() == 18:
250
+ endUid = prop_dict['__dstuid']
251
+ endLabelIdx = Vertex.parseLabelIdxFromRKV18(m['endKey'])
252
+ else:
253
+ endUid = Vertex.parseUidFromRK(m['endKey'])
254
+ endLabelIdx = Vertex.parseLabelIdxFromRK(m['endKey'])
200
255
  endLabel = schema.getVertexLabel(endLabelIdx)
201
256
 
202
257
  if endLabel is None:
@@ -204,19 +259,22 @@ class Edge(GraphElement):
204
259
  'Could not find end node label with label index `{}`'.format(
205
260
  endLabelIdx))
206
261
 
207
- edge.setEndNode(Vertex(endUid, endLabel))
262
+ end_node = Vertex(endUid, endLabel)
263
+ end_node.setRowKeyHexString(" ".join(map(lambda x: str(x), m['entityKey'][8:16])))
264
+ edge.setEndNode(end_node)
208
265
 
209
266
  # parse extra edge id
210
- if '__uid' in m['properties']:
211
- edge.setUid(m['properties']['__uid'])
267
+ if '__uid' in prop_dict:
268
+ edge.setUid(prop_dict['__uid'])
212
269
 
213
270
  # parse properties
214
- for key in m['properties'].keys():
271
+ for key in prop_dict.keys():
215
272
  if key != '__uid' and key != '__tags':
216
273
  edge.setFeild(key, m['properties'][key])
217
274
 
218
275
  # parse tags
219
- edge.setTags(m['properties']['__tags'])
276
+ if '__tags' in prop_dict:
277
+ edge.setTags(prop_dict['__tags'])
220
278
 
221
279
  return edge
222
280
 
@@ -298,6 +356,9 @@ class GraphSchema(object):
298
356
 
299
357
  return None
300
358
 
359
+ def getVersion(self):
360
+ return self._schema_version
361
+
301
362
  def toJSON(self):
302
363
  m = {
303
364
  '__VERSION': self._schema_version,
@@ -320,7 +381,7 @@ class GraphSchema(object):
320
381
  return m
321
382
 
322
383
  def __str__(self):
323
- return json.dumps(self.toJSON())
384
+ return json.dumps(self.toJSON(), ensure_ascii=False)
324
385
 
325
386
  @staticmethod
326
387
  def parseSchemaFromJson(json_str):
@@ -305,7 +305,7 @@ class StellarCursor(hive.Cursor):
305
305
  elif type == 'int':
306
306
  return int(data)
307
307
  elif type == 'long':
308
- return long(data)
308
+ return int(data)
309
309
  elif type == 'float' or type == 'double':
310
310
  return float(data)
311
311
  elif type == 'CruxType:Node' or type == 'GraphNode':
@@ -324,9 +324,9 @@ class StellarCursor(hive.Cursor):
324
324
  def _parseList(self, type, data):
325
325
  """Parse 'CruxType:List' type"""
326
326
  parsed_data = json.loads(data)
327
- newType = type[len('CruxType:List') + 1:-2]
327
+ newType = type[len('CruxType:List') + 1:type.find('>')]
328
328
 
329
- return [self._convertData(newType, entry) for entry in parsed_data]
329
+ return [self._convertData(newType, json.dumps(entry)) for entry in parsed_data]
330
330
 
331
331
  def _parseMap(self, type, data):
332
332
  """Parse 'CruxType:Map' type"""
@@ -6,8 +6,13 @@ from __future__ import absolute_import
6
6
  import abc
7
7
  from future.utils import with_metaclass
8
8
  import logging
9
- from pyspark import RDD, SparkContext
10
- from pyspark.serializers import BatchedSerializer
9
+
10
+ try:
11
+ import pyspark
12
+ from pyspark import RDD, SparkContext
13
+ from pyspark.serializers import BatchedSerializer
14
+ except ImportError:
15
+ pyspark = None
11
16
 
12
17
  _logger = logging.getLogger(__name__)
13
18
 
@@ -20,6 +25,9 @@ def transformToRDD(cursor, sc, parallelism=1):
20
25
  param sc: SparkContext
21
26
  param parallelism: Parallelism of RDD
22
27
  """
28
+ if not pyspark:
29
+ raise ImportError("Could not import pyspark! Please run `pip install pyspark` first in your environment!")
30
+
23
31
  # Get all data from cursor
24
32
  data = cursor.fetchall()
25
33
 
@@ -1,9 +0,0 @@
1
- PyStellarDB-0.11.0.dist-info/METADATA,sha256=u6LLBzhsgZ1CfCagDPa2KyLveV_DdEpxJz0dhicxL0s,9690
2
- PyStellarDB-0.11.0.dist-info/RECORD,,
3
- PyStellarDB-0.11.0.dist-info/WHEEL,sha256=gduuPyBvFJQSQ0zdyxF7k0zynDXbIbvg5ZBHoXum5uk,110
4
- PyStellarDB-0.11.0.dist-info/top_level.txt,sha256=DRk-SeGVCdVAzv2CwFmdu75Yo7DgjUA3Hpu-9l8qPuU,12
5
- pystellardb/__init__.py,sha256=JOl41NviMN-qDV0Z8ZPmhNIxvgyauGGJHdB4A-8MhqM,93
6
- pystellardb/_version.py,sha256=tZcdkmH0v4bTLZeo-KGiXZxkD4WbeXBKdH2pUoCaDmA,498
7
- pystellardb/graph_types.py,sha256=uWBLqPJBKLJ3OoeyFa59thpka0fcYaAwDTcKBH9zeaE,10790
8
- pystellardb/stellar_hive.py,sha256=SMTM-C65kNA7fn0pziW_UNEk-GnxLAtt9Nt0Js1gzX8,13987
9
- pystellardb/stellar_rdd.py,sha256=IQjK0WDO2FaIERqT-cwkRFcoqCgwCpBZXGlcYEcdALI,1116