shareddata 2.0.30__tar.gz → 2.0.32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {shareddata-2.0.30 → shareddata-2.0.32}/PKG-INFO +1 -1
  2. {shareddata-2.0.30 → shareddata-2.0.32}/setup.cfg +1 -1
  3. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/MultiProc.py +2 -2
  4. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedData.py +2 -2
  5. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataPeriod.py +5 -1
  6. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataRealTime.py +2 -2
  7. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataTable.py +5 -5
  8. shareddata-2.0.30/src/SharedData/SharedDataTimeSeries.py → shareddata-2.0.32/src/SharedData/SharedDataTimeSeries copy.py +12 -12
  9. shareddata-2.0.32/src/SharedData/SharedDataTimeSeries.py +572 -0
  10. {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/PKG-INFO +1 -1
  11. {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/SOURCES.txt +1 -0
  12. {shareddata-2.0.30 → shareddata-2.0.32}/LICENSE +0 -0
  13. {shareddata-2.0.30 → shareddata-2.0.32}/README.md +0 -0
  14. {shareddata-2.0.30 → shareddata-2.0.32}/pyproject.toml +0 -0
  15. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/Defaults.py +0 -0
  16. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/Logger.py +0 -0
  17. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/LoggerConsumerProcess.py +0 -0
  18. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/Metadata.py +0 -0
  19. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SeriesLib.py +0 -0
  20. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataAWSKinesis.py +0 -0
  21. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataAWSS3.py +0 -0
  22. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataFeeder.py +0 -0
  23. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataFrame.py +0 -0
  24. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataRealTimeProcess.py +0 -0
  25. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataTableIndex.py +0 -0
  26. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataTableIndexJit.py +0 -0
  27. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedNumpy.py +0 -0
  28. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/Utils.py +0 -0
  29. {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/__init__.py +0 -0
  30. {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/dependency_links.txt +0 -0
  31. {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/requires.txt +0 -0
  32. {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: shareddata
3
- Version: 2.0.30
3
+ Version: 2.0.32
4
4
  Summary: Shared Memory Database with S3 repository
5
5
  Home-page: https://github.com/jcarlitooliveira/SharedData
6
6
  Author: Jose Carlito de Oliveira Filho
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = shareddata
3
- version = 2.0.30
3
+ version = 2.0.32
4
4
  author = Jose Carlito de Oliveira Filho
5
5
  author_email = jcarlitooliveira@gmail.com
6
6
  description = Shared Memory Database with S3 repository
@@ -36,7 +36,7 @@ from threading import Thread
36
36
 
37
37
 
38
38
  # load all files in a directory into memory
39
- def io_bound(thread_func, iterator, args, maxthreads=10, maxproc=None):
39
+ def io_bound(thread_func, iterator, args, maxproc=None, maxthreads=10):
40
40
  results = []
41
41
  # determine chunksize
42
42
  niterator = len(iterator)
@@ -67,7 +67,7 @@ def io_bound(thread_func, iterator, args, maxthreads=10, maxproc=None):
67
67
  def io_bound_process(thread_func, proc_iterator, args, maxthreads):
68
68
  results = []
69
69
  # create a thread pool
70
- nthreads = len(nthreads)
70
+ nthreads = len(proc_iterator)
71
71
  nthreads = min(nthreads,maxthreads)
72
72
  if nthreads>0:
73
73
  with ThreadPoolExecutor(nthreads) as exe:
@@ -174,5 +174,5 @@ class SharedData:
174
174
  for shm_name in shm_names.index:
175
175
  self.free(shm_name)
176
176
 
177
- def SubscribeRealTime(self):
178
- SharedDataRealTime.Subscribe(self)
177
+ def subscriberealtime(self):
178
+ SharedDataRealTime.subscribe(self)
@@ -136,4 +136,8 @@ class SharedDataPeriod:
136
136
  return fpath.is_file()
137
137
  else:
138
138
  return False
139
-
139
+
140
+ def create_timeseries(self,tag,startDate,columns,overwrite=False):
141
+ self.tags[tag] = SharedDataTimeSeries(\
142
+ self,tag,startDate=startDate,columns=columns,overwrite=overwrite)
143
+ return self.tags[tag].data
@@ -11,7 +11,7 @@ class SharedDataRealTime:
11
11
  # producer dictionary
12
12
  producer = {}
13
13
 
14
- def Broadcast(shdata,feeder,period,tag,idx,col):
14
+ def broadcast(shdata,feeder,period,tag,idx,col):
15
15
  producer = SharedDataRealTime.getProducer(shdata)
16
16
  data = shdata[feeder][period][tag].loc[idx,col].values
17
17
 
@@ -64,7 +64,7 @@ class SharedDataRealTime:
64
64
  KinesisStreamProducer(stream_name=streamname)
65
65
  return SharedDataRealTime.producer[shdata.database]
66
66
 
67
- def Subscribe(shdata):
67
+ def subscribe(shdata):
68
68
  today = pd.Timestamp(pd.Timestamp.now().date())
69
69
  wd = shdata['WATCHDOG']['D1']
70
70
  dfwatchdog = wd[today]
@@ -104,12 +104,12 @@ class SharedDataTable:
104
104
  self.init_time = time.time() - self.init_time
105
105
 
106
106
  def ismalloc(self):
107
- path, shm_name = self.get_path(iswrite=True)
107
+ path, shm_name = self.get_path()
108
108
  [self.shm, ismalloc] = self.sharedData.malloc(shm_name)
109
109
  return ismalloc
110
110
 
111
111
  def create(self, names, formats, size):
112
- path, shm_name = self.get_path(iswrite=True)
112
+ path, shm_name = self.get_path()
113
113
  #TODO: WRITE FILE WITH POINTER TO SHARED DATA TO KEEP TRACK OF OPENED MEMORY
114
114
  check_pkey = True
115
115
  npkeys = len(self.keys.pkeycolumns)
@@ -400,7 +400,7 @@ class SharedDataTable:
400
400
  self.records.table = self
401
401
 
402
402
  def write(self):
403
- path, shm_name = self.get_path(iswrite=True)
403
+ path, shm_name = self.get_path()
404
404
 
405
405
  try:
406
406
  self.acquire()
@@ -574,7 +574,7 @@ class SharedDataTable:
574
574
 
575
575
  return rec
576
576
 
577
- def get_path(self, iswrite=False):
577
+ def get_path(self):
578
578
  shm_name = self.sharedData.user + '/' + self.sharedData.database + '/' \
579
579
  + self.sharedDataFeeder.feeder + '/' + self.dataset
580
580
  if os.name=='posix':
@@ -609,7 +609,7 @@ class SharedDataTable:
609
609
  raise Exception('Tried to release semaphore without acquire!')
610
610
 
611
611
  def free(self):
612
- path, shm_name = self.get_path(iswrite=True)
612
+ path, shm_name = self.get_path()
613
613
  self.sharedData.free(shm_name)
614
614
  self.sharedData.free(shm_name+'#pkey')
615
615
  self.sharedData.free(shm_name+'#dateidx')
@@ -42,7 +42,7 @@ class SharedDataTimeSeries:
42
42
  self.columns = pd.Index([])
43
43
 
44
44
 
45
- if value is None: #Read dataset tag
45
+ if value is None: #read dataset tag
46
46
  feeder = self.sharedDataFeeder.feeder
47
47
  dataset = sharedDataPeriod.dataset
48
48
  sharedData = sharedDataPeriod.sharedData
@@ -73,9 +73,9 @@ class SharedDataTimeSeries:
73
73
  self.ctimeidx = sharedDataPeriod.getContinousTimeIndex(self.startDate)
74
74
 
75
75
  #allocate memory
76
- self.isCreate = self.Malloc()
76
+ self.isCreate = self.malloc()
77
77
  if self.isCreate:
78
- self.Read()
78
+ self.read()
79
79
 
80
80
  else: # map existing dataframe
81
81
  self.startDate = value.index[0]
@@ -88,7 +88,7 @@ class SharedDataTimeSeries:
88
88
 
89
89
  self.ctimeidx = self.sharedDataPeriod.getContinousTimeIndex(self.startDate)
90
90
  #allocate memory
91
- isCreate = self.Malloc(value=value)
91
+ isCreate = self.malloc(value=value)
92
92
 
93
93
  self.init_time = time.time() - self.init_time
94
94
 
@@ -192,14 +192,14 @@ class SharedDataTimeSeries:
192
192
  i=i+1
193
193
 
194
194
  # C R U D
195
- def Malloc(self, value=None):
195
+ def malloc(self, value=None):
196
196
  tini=time.time()
197
197
 
198
198
  #Create write ndarray
199
199
  path, shm_name = self.getDataPath(iswrite=True)
200
200
 
201
201
  if os.environ['LOG_LEVEL']=='DEBUG':
202
- Logger.log.debug('Malloc %s ...%.2f%% ' % (shm_name,0.0))
202
+ Logger.log.debug('malloc %s ...%.2f%% ' % (shm_name,0.0))
203
203
 
204
204
  try: # try create memory file
205
205
  r = len(self.index)
@@ -243,7 +243,7 @@ class SharedDataTimeSeries:
243
243
  value = self.data
244
244
 
245
245
  if os.environ['LOG_LEVEL']=='DEBUG':
246
- Logger.log.debug('Malloc create %s ...%.2f%% %.2f sec! ' % \
246
+ Logger.log.debug('malloc create %s ...%.2f%% %.2f sec! ' % \
247
247
  (shm_name,100,time.time()-tini))
248
248
  self.create_map = 'create'
249
249
  return True
@@ -284,13 +284,13 @@ class SharedDataTimeSeries:
284
284
  self.data.loc[iidx, icol] = value.loc[iidx, icol]
285
285
 
286
286
  if os.environ['LOG_LEVEL']=='DEBUG':
287
- Logger.log.debug('Malloc map %s/%s/%s ...%.2f%% %.2f sec! ' % \
287
+ Logger.log.debug('malloc map %s/%s/%s ...%.2f%% %.2f sec! ' % \
288
288
  (self.feeder,self.period,self.tag,100,time.time()-tini))
289
289
  self.create_map = 'map'
290
290
  return False
291
291
 
292
292
  # READ
293
- def Read(self):
293
+ def read(self):
294
294
  tini = time.time()
295
295
  path, shm_name = self.getDataPath()
296
296
  headpath = path / (self.tag+'_head.bin')
@@ -365,7 +365,7 @@ class SharedDataTimeSeries:
365
365
  data_io.close()
366
366
 
367
367
  # WRITE
368
- def Write(self, startDate=None):
368
+ def write(self, startDate=None):
369
369
  firstdate = self.data.first_valid_index()
370
370
  if not startDate is None:
371
371
  firstdate = startDate
@@ -450,8 +450,8 @@ class SharedDataTimeSeries:
450
450
  os.utime(path, (mtime, mtime))
451
451
 
452
452
  # MESSAGES
453
- def Broadcast(self,idx,col):
454
- SharedDataRealTime.Broadcast(
453
+ def broadcast(self,idx,col):
454
+ SharedDataRealTime.broadcast(
455
455
  self.sharedData,
456
456
  self.feeder,
457
457
  self.period,
@@ -0,0 +1,572 @@
1
+ # THIRD PARTY LIBS
2
+ import os,sys
3
+ import pandas as pd
4
+ import numpy as np
5
+ import json
6
+ import time
7
+ from numba import jit
8
+ from pathlib import Path
9
+ from multiprocessing import shared_memory
10
+ import io, gzip, hashlib, shutil
11
+ from threading import Thread
12
+
13
+ from subprocess import run, PIPE
14
+ from datetime import datetime, timedelta
15
+
16
+ from SharedData.Logger import Logger
17
+ from SharedData.SharedDataAWSS3 import S3Upload,S3Download,UpdateModTime
18
+ from SharedData.SharedDataRealTime import SharedDataRealTime
19
+
20
+ class SharedDataTimeSeries:
21
+
22
+ def __init__(self, sharedDataPeriod, tag, value=None,\
23
+ startDate=None,columns=None,overwrite=False):
24
+ self.sharedDataPeriod = sharedDataPeriod
25
+ self.tag = tag
26
+
27
+ self.sharedDataFeeder = sharedDataPeriod.sharedDataFeeder
28
+ self.sharedData = sharedDataPeriod.sharedDataFeeder.sharedData
29
+
30
+ self.period = sharedDataPeriod.period
31
+ self.periodSeconds = sharedDataPeriod.periodSeconds
32
+ self.feeder = self.sharedDataFeeder.feeder
33
+
34
+ # test if shared memory already exists
35
+ if self.ismalloc():
36
+ self.create_map = 'map'
37
+ else:
38
+ self.create_map = 'create'
39
+
40
+ self.init_time = time.time()
41
+ self.download_time = pd.NaT
42
+ self.last_update = pd.NaT
43
+ self.first_update = pd.NaT
44
+
45
+ # Time series dataframe
46
+ self.data = pd.DataFrame()
47
+ self.index = pd.Index([])
48
+ self.columns = pd.Index([])
49
+
50
+ # initalize
51
+ try:
52
+ if ((self.create_map == 'create') | (overwrite)):
53
+ if (not startDate is None):
54
+ # create new empty shared memory
55
+ self.startDate = startDate
56
+ self.columns = columns
57
+ self.malloc_create()
58
+
59
+ elif (not value is None):
60
+ # allocate existing data
61
+ self.startDate = value.index[0]
62
+ self.columns = value.columns
63
+ self.malloc_create()
64
+ self.setValues(value.index,value.columns,value.values)
65
+
66
+ elif (value is None):
67
+ # read & allocate data
68
+ tini = time.time()
69
+ datasize = self.read()
70
+ te = time.time()-tini+0.000001
71
+ Logger.log.debug('read %s/%s %.2fMB in %.2fs %.2fMBps ' % \
72
+ (self.feeder,self.tag,datasize,te,datasize/te))
73
+
74
+ elif (self.create_map == 'map'):
75
+ # map existing shared memory
76
+ self.malloc_map()
77
+ except Exception as e:
78
+ path, shm_name = self.get_path()
79
+ Logger.log.error('Error initalizing %s!\n%s' % (shm_name,str(e)))
80
+ self.free()
81
+
82
+ self.init_time = time.time() - self.init_time
83
+
84
+ def get_path(self):
85
+ shm_name = self.sharedData.user + '/' + self.sharedData.database + '/' \
86
+ + self.sharedDataFeeder.feeder + '/' + self.period + '/' + self.tag
87
+ if os.name=='posix':
88
+ shm_name = shm_name.replace('/','\\')
89
+
90
+ path = Path(os.environ['DATABASE_FOLDER'])
91
+ path = path / self.sharedData.user
92
+ path = path / self.sharedData.database
93
+ path = path / self.sharedDataFeeder.feeder
94
+ path = path / self.period
95
+ path = path / self.tag
96
+ path = Path(str(path).replace('\\','/'))
97
+ if self.sharedData.save_local:
98
+ if not os.path.isdir(path):
99
+ os.makedirs(path)
100
+
101
+ return path, shm_name
102
+
103
+ def ismalloc(self):
104
+ path, shm_name = self.get_path()
105
+ [self.shm, ismalloc] = self.sharedData.malloc(shm_name)
106
+ return ismalloc
107
+
108
+ def malloc_create(self):
109
+ path, shm_name = self.get_path()
110
+ self.symbolidx = {}
111
+ for i in range(len(self.columns)):
112
+ self.symbolidx[self.columns.values[i]] = i
113
+ self.index = self.sharedDataPeriod.getTimeIndex(self.startDate)
114
+ self.ctimeidx = self.sharedDataPeriod.getContinousTimeIndex(self.startDate)
115
+ try: # try create memory file
116
+ r = len(self.index)
117
+ c = len(self.columns)
118
+
119
+ idx_b = self.index.astype(np.int64).values.tobytes()
120
+ colscsv_b = str.encode(','.join(self.columns.values),\
121
+ encoding='UTF-8',errors='ignore')
122
+ nb_idx = len(idx_b)
123
+ nb_cols = len(colscsv_b)
124
+ nb_data = int(r*c*8)
125
+ header_b = np.array([r,c,nb_idx,nb_cols,nb_data]).astype(np.int64).tobytes()
126
+ nb_header = len(header_b)
127
+
128
+ nb_buf = nb_header+nb_idx+nb_cols+nb_data
129
+ nb_offset = nb_header+nb_idx+nb_cols
130
+
131
+ [self.shm, ismalloc] = self.sharedData.malloc(shm_name,create=True,size=nb_buf)
132
+
133
+ i=0
134
+ self.shm.buf[i:nb_header] = header_b
135
+ i = i + nb_header
136
+ self.shm.buf[i:i+nb_idx] = idx_b
137
+ i = i + nb_idx
138
+ self.shm.buf[i:i+nb_cols] = colscsv_b
139
+
140
+ self.shmarr = np.ndarray((r,c),\
141
+ dtype=np.float64, buffer=self.shm.buf, offset=nb_offset)
142
+
143
+ self.shmarr[:] = np.nan
144
+
145
+ self.data = pd.DataFrame(self.shmarr,\
146
+ index=self.index,\
147
+ columns=self.columns,\
148
+ copy=False)
149
+
150
+ return True
151
+ except Exception as e:
152
+ Logger.log.error('Failed to malloc_create\n%s' % str(e))
153
+ return False
154
+
155
+ def malloc_map(self):
156
+ try: # try map memory file
157
+ path, shm_name = self.get_path()
158
+ [self.shm, ismalloc] = self.sharedData.malloc(shm_name)
159
+
160
+ i=0
161
+ nb_header=40
162
+ header = np.frombuffer(self.shm.buf[i:nb_header],dtype=np.int64)
163
+ i = i + nb_header
164
+ nb_idx = header[2]
165
+ idx_b = bytes(self.shm.buf[i:i+nb_idx])
166
+ self.index = pd.to_datetime(np.frombuffer(idx_b,dtype=np.int64))
167
+ i = i + nb_idx
168
+ nb_cols = header[3]
169
+ cols_b = bytes(self.shm.buf[i:i+nb_cols])
170
+ self.columns = cols_b.decode(encoding='UTF-8',errors='ignore').split(',')
171
+
172
+ r = header[0]
173
+ c = header[1]
174
+ nb_data = header[4]
175
+ nb_offset = nb_header+nb_idx+nb_cols
176
+
177
+ self.shmarr = np.ndarray((r,c), dtype=np.float64,\
178
+ buffer=self.shm.buf, offset=nb_offset)
179
+
180
+ self.data = pd.DataFrame(self.shmarr,\
181
+ index=self.index,\
182
+ columns=self.columns,\
183
+ copy=False)
184
+
185
+ return True
186
+ except Exception as e:
187
+ Logger.log.error('Failed to malloc_map\n%s' % str(e))
188
+ return False
189
+
190
+ # READ
191
+ def read(self):
192
+ datasize = 0
193
+ path, shm_name = self.get_path()
194
+ headpath = path / (self.tag+'_head.bin')
195
+ tailpath = path / (self.tag+'_tail.bin')
196
+ head_io = None
197
+ tail_io = None
198
+ if self.sharedData.s3read:
199
+ force_download= (not self.sharedData.save_local)
200
+
201
+ [head_io_gzip, head_local_mtime, head_remote_mtime] = \
202
+ S3Download(str(headpath),str(headpath)+'.gzip',force_download)
203
+ if not head_io_gzip is None:
204
+ head_io = io.BytesIO()
205
+ head_io_gzip.seek(0)
206
+ with gzip.GzipFile(fileobj=head_io_gzip, mode='rb') as gz:
207
+ shutil.copyfileobj(gz,head_io)
208
+ if self.sharedData.save_local:
209
+ SharedDataTimeSeries.write_file(head_io,headpath,mtime=head_remote_mtime)
210
+ UpdateModTime(headpath,head_remote_mtime)
211
+
212
+
213
+ [tail_io_gzip, tail_local_mtime, tail_remote_mtime] = \
214
+ S3Download(str(tailpath),str(tailpath)+'.gzip',force_download)
215
+ if not tail_io_gzip is None:
216
+ tail_io = io.BytesIO()
217
+ tail_io_gzip.seek(0)
218
+ with gzip.GzipFile(fileobj=tail_io_gzip, mode='rb') as gz:
219
+ shutil.copyfileobj(gz,tail_io)
220
+ if self.sharedData.save_local:
221
+ SharedDataTimeSeries.write_file(tail_io,tailpath,mtime=tail_remote_mtime)
222
+ UpdateModTime(tailpath,tail_remote_mtime)
223
+
224
+ if (head_io is None) & (self.sharedData.save_local):
225
+ # read local
226
+ if os.path.isfile(str(headpath)):
227
+ head_io = open(str(headpath),'rb')
228
+
229
+ if (tail_io is None) & (self.sharedData.save_local):
230
+ if os.path.isfile(str(tailpath)):
231
+ tail_io = open(str(tailpath),'rb')
232
+
233
+ self.startDate = None
234
+ self.columns = pd.Index([])
235
+ # read index, columns
236
+ if not head_io is None:
237
+ head_io.seek(0)
238
+ [index,columns] = self.read_header(head_io)
239
+ self.columns = self.columns.union(columns)
240
+ self.startDate = pd.Timestamp(index.values[0])
241
+
242
+ if not tail_io is None:
243
+ tail_io.seek(0)
244
+ [index,columns] = self.read_header(tail_io)
245
+ self.columns = self.columns.union(columns)
246
+ if self.startDate==None:
247
+ self.startDate = pd.Timestamp(index.values[0])
248
+
249
+ if not self.startDate is None:
250
+ self.malloc_create()
251
+ # read data
252
+ if not head_io is None:
253
+ head_io.seek(0)
254
+ datasize+=self.read_data(head_io,headpath)
255
+ head_io.close()
256
+
257
+ if not tail_io is None:
258
+ tail_io.seek(0)
259
+ datasize+=self.read_data(tail_io,tailpath)
260
+ tail_io.close()
261
+
262
+ return datasize
263
+
264
+ def read_header(self,data_io):
265
+ _header = np.frombuffer(data_io.read(40),dtype=np.int64)
266
+ _idx_b = data_io.read(int(_header[2]))
267
+ _idx = pd.to_datetime(np.frombuffer(_idx_b,dtype=np.int64))
268
+ _colscsv_b = data_io.read(int(_header[3]))
269
+ _colscsv = _colscsv_b.decode(encoding='UTF-8',errors='ignore')
270
+ _cols = _colscsv.split(',')
271
+ return [_idx,_cols]
272
+
273
+ def read_data(self,data_io,path):
274
+ _header = np.frombuffer(data_io.read(40),dtype=np.int64)
275
+ _idx_b = data_io.read(int(_header[2]))
276
+ _idx = pd.to_datetime(np.frombuffer(_idx_b,dtype=np.int64))
277
+ _colscsv_b = data_io.read(int(_header[3]))
278
+ _colscsv = _colscsv_b.decode(encoding='UTF-8',errors='ignore')
279
+ _cols = _colscsv.split(',')
280
+ _data = np.frombuffer(data_io.read(int(_header[4])),dtype=np.float64).reshape((_header[0],_header[1]))
281
+ #calculate hash
282
+ _m = hashlib.md5(_idx_b)
283
+ _m.update(_colscsv_b)
284
+ _m.update(_data)
285
+ _md5hash_b = _m.digest()
286
+ __md5hash_b = data_io.read(16)
287
+ if not _md5hash_b==__md5hash_b:
288
+ raise Exception('Timeseries file corrupted!\n%s' % (path))
289
+ sidx = np.array([self.get_loc_symbol(s) for s in _cols])
290
+ ts = _idx.values.astype(np.int64)/10**9 #seconds
291
+ tidx = self.get_loc_timestamp(ts)
292
+ self.setValuesJit(self.data.values,tidx,sidx,_data)
293
+ data_io.close()
294
+ return _header[4]
295
+
296
+ # WRITE
297
+ def write(self, startDate=None):
298
+ firstdate = self.data.first_valid_index()
299
+ if not startDate is None:
300
+ firstdate = startDate
301
+ self.write_partitions(firstdate)
302
+
303
+ def write_partitions(self,firstdate):
304
+ tini = time.time()
305
+ path, shm_name = self.get_path()
306
+
307
+ partdate = pd.Timestamp(datetime(datetime.now().year,1,1))
308
+ threads = []
309
+
310
+ mtime = datetime.now().timestamp()
311
+ if firstdate<partdate:
312
+ # write head
313
+ threads = [*threads , \
314
+ Thread(target=SharedDataTimeSeries.write_timeseries_df,\
315
+ args=(self,self.data.loc[:partdate], str(path / (self.tag+'_head.bin')), mtime) )]
316
+ # write tail
317
+ threads = [*threads , \
318
+ Thread(target=SharedDataTimeSeries.write_timeseries_df,\
319
+ args=(self,self.data.loc[partdate:], str(path / (self.tag+'_tail.bin')), mtime) )]
320
+
321
+ for i in range(len(threads)):
322
+ threads[i].start()
323
+
324
+ for i in range(len(threads)):
325
+ threads[i].join()
326
+
327
+ def write_timeseries_df(self,df,tag_path,mtime):
328
+ ts_io = SharedDataTimeSeries.create_timeseries_io(df)
329
+ threads=[]
330
+ if self.sharedData.s3write:
331
+ ts_io.seek(0)
332
+ gzip_io = io.BytesIO()
333
+ with gzip.GzipFile(fileobj=gzip_io, mode='wb', compresslevel=1) as gz:
334
+ shutil.copyfileobj(ts_io, gz)
335
+
336
+ threads = [*threads , \
337
+ Thread(target=S3Upload,args=(gzip_io, tag_path+'.gzip', mtime) )]
338
+
339
+ if self.sharedData.save_local:
340
+ threads = [*threads , \
341
+ Thread(target=SharedDataTimeSeries.write_file, args=(ts_io, tag_path, mtime) )]
342
+
343
+ for i in range(len(threads)):
344
+ threads[i].start()
345
+
346
+ for i in range(len(threads)):
347
+ threads[i].join()
348
+
349
+ def create_timeseries_io(df):
350
+ df = df.dropna(how='all',axis=0).dropna(how='all',axis=1)
351
+ r, c = df.shape
352
+ idx = (df.index.astype(np.int64))
353
+ idx_b = idx.values.tobytes()
354
+ cols = df.columns.values
355
+ colscsv = ','.join(cols)
356
+ colscsv_b = str.encode(colscsv,encoding='UTF-8',errors='ignore')
357
+ nbidx = len(idx_b)
358
+ nbcols = len(colscsv_b)
359
+ data = np.ascontiguousarray(df.values.astype(np.float64))
360
+ header = np.array([r,c,nbidx,nbcols,r*c*8]).astype(np.int64)
361
+ #calculate hash
362
+ m = hashlib.md5(idx_b)
363
+ m.update(colscsv_b)
364
+ m.update(data)
365
+ md5hash_b = m.digest()
366
+ # allocate memory
367
+ io_obj = io.BytesIO()
368
+ io_obj.write(header)
369
+ io_obj.write(idx_b)
370
+ io_obj.write(colscsv_b)
371
+ io_obj.write(data)
372
+ io_obj.write(md5hash_b)
373
+ return io_obj
374
+
375
+ def write_file(io_obj,path,mtime):
376
+ with open(path, 'wb') as f:
377
+ f.write(io_obj.getbuffer())
378
+ f.flush()
379
+ os.utime(path, (mtime, mtime))
380
+
381
+ # MESSAGES
382
+ def broadcast(self,idx,col):
383
+ SharedDataRealTime.broadcast(
384
+ self.sharedData,
385
+ self.feeder,
386
+ self.period,
387
+ self.tag,
388
+ idx,col)
389
+
390
+ # get / set
391
+ def get_loc_symbol(self, symbol):
392
+ if symbol in self.symbolidx.keys():
393
+ return self.symbolidx[symbol]
394
+ else:
395
+ return np.nan
396
+
397
+ def get_loc_timestamp(self, ts):
398
+ istartdate = self.startDate.timestamp() #seconds
399
+ if not np.isscalar(ts):
400
+ tidx = self.get_loc_timestamp_Jit(ts, istartdate, \
401
+ self.periodSeconds, self.ctimeidx)
402
+ return tidx
403
+ else:
404
+ tids = np.int64(ts) #seconds
405
+ tids = np.int64(tids - istartdate)
406
+ tids = np.int64(tids/self.periodSeconds)
407
+ if tids<self.ctimeidx.shape[0]:
408
+ tidx = self.ctimeidx[tids]
409
+ return tidx
410
+ else:
411
+ return np.nan
412
+
413
+ @staticmethod
414
+ @jit(nopython=True, nogil=True, cache=True)
415
+ def get_loc_timestamp_Jit(ts, istartdate, periodSeconds, ctimeidx):
416
+ tidx = np.empty(ts.shape, dtype=np.float64)
417
+ len_ctimeidx = len(ctimeidx)
418
+ for i in range(len(tidx)):
419
+ tid = np.int64(ts[i])
420
+ tid = np.int64(tid-istartdate)
421
+ tid = np.int64(tid/periodSeconds)
422
+ if tid < len_ctimeidx:
423
+ tidx[i] = ctimeidx[tid]
424
+ else:
425
+ tidx[i] = np.nan
426
+ return tidx
427
+
428
+ def getValue(self,ts,symbol):
429
+ sidx = self.get_loc_symbol(symbol)
430
+ tidx = self.get_loc_timestamp(ts)
431
+ if (not np.isnan(sidx)) & (not np.isnan(tidx)):
432
+ return self.data.values[np.int64(tidx),int(sidx)]
433
+ else:
434
+ return np.nan
435
+
436
+ def setValue(self,ts,symbol,value):
437
+ sidx = self.get_loc_symbol(symbol)
438
+ tidx = self.get_loc_timestamp(ts)
439
+ if (not np.isnan(sidx)) & (not np.isnan(tidx)):
440
+ self.data.values[np.int64(tidx),int(sidx)] = value
441
+
442
+ def setValues(self,ts,symbol,values):
443
+ sidx = self.get_loc_symbol(symbol)
444
+ tidx = self.get_loc_timestamp(ts)
445
+ self.setValuesSymbolJit(self.data.values, tidx, sidx, values)
446
+
447
+ @staticmethod
448
+ @jit(nopython=True, nogil=True, cache=True)
449
+ def setValuesSymbolJit(values,tidx,sidx,arr):
450
+ if not np.isnan(sidx):
451
+ s = np.int64(sidx)
452
+ i = 0
453
+ for t in tidx:
454
+ if not np.isnan(t):
455
+ values[np.int64(t),s] = arr[i]
456
+ i=i+1
457
+
458
+ @staticmethod
459
+ @jit(nopython=True, nogil=True, cache=True)
460
+ def setValuesJit(values,tidx,sidx,arr):
461
+ i = 0
462
+ for t in tidx:
463
+ if not np.isnan(t):
464
+ j = 0
465
+ for s in sidx:
466
+ if not np.isnan(s):
467
+ values[np.int64(t),np.int64(s)] = arr[i,j]
468
+ j=j+1
469
+ i=i+1
470
+
471
+
472
+ # C R U D
473
+ def malloc(self, value=None):
474
+ tini=time.time()
475
+
476
+ #Create write ndarray
477
+ path, shm_name = self.get_path()
478
+
479
+ if os.environ['LOG_LEVEL']=='DEBUG':
480
+ Logger.log.debug('malloc %s ...%.2f%% ' % (shm_name,0.0))
481
+
482
+ try: # try create memory file
483
+ r = len(self.index)
484
+ c = len(self.columns)
485
+
486
+ idx_b = self.index.astype(np.int64).values.tobytes()
487
+ colscsv_b = str.encode(','.join(self.columns.values),\
488
+ encoding='UTF-8',errors='ignore')
489
+ nb_idx = len(idx_b)
490
+ nb_cols = len(colscsv_b)
491
+ nb_data = int(r*c*8)
492
+ header_b = np.array([r,c,nb_idx,nb_cols,nb_data]).astype(np.int64).tobytes()
493
+ nb_header = len(header_b)
494
+
495
+ nb_buf = nb_header+nb_idx+nb_cols+nb_data
496
+ nb_offset = nb_header+nb_idx+nb_cols
497
+
498
+ [self.shm, ismalloc] = self.sharedData.malloc(shm_name,create=True,size=nb_buf)
499
+
500
+ i=0
501
+ self.shm.buf[i:nb_header] = header_b
502
+ i = i + nb_header
503
+ self.shm.buf[i:i+nb_idx] = idx_b
504
+ i = i + nb_idx
505
+ self.shm.buf[i:i+nb_cols] = colscsv_b
506
+
507
+ self.shmarr = np.ndarray((r,c),\
508
+ dtype=np.float64, buffer=self.shm.buf, offset=nb_offset)
509
+
510
+ if not value is None:
511
+ self.shmarr[:] = value.values.copy()
512
+ else:
513
+ self.shmarr[:] = np.nan
514
+
515
+ self.data = pd.DataFrame(self.shmarr,\
516
+ index=self.index,\
517
+ columns=self.columns,\
518
+ copy=False)
519
+
520
+ if not value is None:
521
+ value = self.data
522
+
523
+ if os.environ['LOG_LEVEL']=='DEBUG':
524
+ Logger.log.debug('malloc create %s ...%.2f%% %.2f sec! ' % \
525
+ (shm_name,100,time.time()-tini))
526
+ self.create_map = 'create'
527
+ return True
528
+ except Exception as e:
529
+ pass
530
+
531
+ # map memory file
532
+ [self.shm, ismalloc] = self.sharedData.malloc(shm_name)
533
+
534
+ i=0
535
+ nb_header=40
536
+ header = np.frombuffer(self.shm.buf[i:nb_header],dtype=np.int64)
537
+ i = i + nb_header
538
+ nb_idx = header[2]
539
+ idx_b = bytes(self.shm.buf[i:i+nb_idx])
540
+ self.index = pd.to_datetime(np.frombuffer(idx_b,dtype=np.int64))
541
+ i = i + nb_idx
542
+ nb_cols = header[3]
543
+ cols_b = bytes(self.shm.buf[i:i+nb_cols])
544
+ self.columns = cols_b.decode(encoding='UTF-8',errors='ignore').split(',')
545
+
546
+ r = header[0]
547
+ c = header[1]
548
+ nb_data = header[4]
549
+ nb_offset = nb_header+nb_idx+nb_cols
550
+
551
+ self.shmarr = np.ndarray((r,c), dtype=np.float64,\
552
+ buffer=self.shm.buf, offset=nb_offset)
553
+
554
+ self.data = pd.DataFrame(self.shmarr,\
555
+ index=self.index,\
556
+ columns=self.columns,\
557
+ copy=False)
558
+
559
+ if not value is None:
560
+ iidx = value.index.intersection(self.data.index)
561
+ icol = value.columns.intersection(self.data.columns)
562
+ self.data.loc[iidx, icol] = value.loc[iidx, icol]
563
+
564
+ if os.environ['LOG_LEVEL']=='DEBUG':
565
+ Logger.log.debug('malloc map %s/%s/%s ...%.2f%% %.2f sec! ' % \
566
+ (self.feeder,self.period,self.tag,100,time.time()-tini))
567
+ self.create_map = 'map'
568
+ return False
569
+
570
+ def free(self):
571
+ path, shm_name = self.get_path()
572
+ self.sharedData.free(shm_name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: shareddata
3
- Version: 2.0.30
3
+ Version: 2.0.32
4
4
  Summary: Shared Memory Database with S3 repository
5
5
  Home-page: https://github.com/jcarlitooliveira/SharedData
6
6
  Author: Jose Carlito de Oliveira Filho
@@ -19,6 +19,7 @@ src/SharedData/SharedDataRealTimeProcess.py
19
19
  src/SharedData/SharedDataTable.py
20
20
  src/SharedData/SharedDataTableIndex.py
21
21
  src/SharedData/SharedDataTableIndexJit.py
22
+ src/SharedData/SharedDataTimeSeries copy.py
22
23
  src/SharedData/SharedDataTimeSeries.py
23
24
  src/SharedData/SharedNumpy.py
24
25
  src/SharedData/Utils.py
File without changes
File without changes
File without changes