xy-scale 1.0.3 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  This repository provides utilities for scaling and preparing datasets in JavaScript, with a primary focus on data preprocessing for machine learning applications. The main functionality includes scaling numerical and categorical data and splitting datasets into training and testing sets.
7
7
 
8
- The primary functions, `parseTrainingDataset` and `parseProductionDataset`, offer a flexible and modular approach to data handling, allowing users to define custom scaling approaches, weighting of features, and specific parsing rules for features and labels.
8
+ The primary functions, `parseTrainingXY` and `parseProductionX`, offer a flexible and modular approach to data handling, allowing users to define custom scaling approaches, weighting of X, and specific parsing rules for X and Y.
9
9
 
10
10
  ---
11
11
 
@@ -17,60 +17,62 @@ The primary functions, `parseTrainingDataset` and `parseProductionDataset`, offe
17
17
 
18
18
  ## Main Functions
19
19
 
20
- ### 1. `parseTrainingDataset`
20
+ ### 1. `parseTrainingXY`
21
21
 
22
22
  This function prepares a dataset for supervised learning by parsing, scaling, and splitting it into training and testing subsets. It includes configurable options for feature weighting and scaling approaches.
23
23
 
24
24
  #### Parameters:
25
- - `arrObj` (Array of Objects): Input data array containing all features and labels.
25
+ - `arrObj` (Array of Objects): Input data array containing all X and Y.
26
26
  - `trainingSplit` (Number, optional): Defines the training dataset size (default `0.8`).
27
27
  - `weights` (Object, optional): Feature weights for scaling.
28
- - `parseLabels` (Function): Custom function to parse labels for each object.
29
- - `parseFeatures` (Function): Custom function to parse features for each object.
28
+ - `yCallbackFunc` (Function): Custom function to parse Y for each object.
29
+ - `xCallbackFunc` (Function): Custom function to parse X for each object.
30
30
  - `forceScaling` (String, optional): Forces a specific scaling approach for each feature.
31
+ - `timeSteps` (Number, optional): Transforms a one-dimensional array into an array of overlapping sequences (timesteps), each of a specified length. Default is 0 returning original output.
31
32
 
32
33
  #### Features:
33
- - **Label and Feature Parsing**: Custom parsing for labels and features based on user-defined functions.
34
- - **Configurable Scaling and Splitting**: Scales features and labels independently and splits data into training and testing sets.
34
+ - **Y and X Parsing**: Custom parsing for Y and X based on user-defined functions.
35
+ - **Configurable Scaling and Splitting**: Scales X and Y independently and splits data into training and testing sets.
35
36
 
36
37
  #### Scaling Approaches:
37
38
  - **Normalization**: Scales values to a range of `[0, 1]`.
38
39
  - **Standardization**: Scales values to have a mean of `0` and standard deviation of `1`.
39
40
  - **Automatic Selection (Default)**: If `forceScaling = null`, the function automatically selects between `'normalization'` and `'standardization'` for each feature.
40
- - **Normalization** is chosen for features with lower variance (small difference between mean and standard deviation), scaling values to a `[0, 1]` range.
41
+ - **Normalization** is chosen for X with lower variance (small difference between mean and standard deviation), scaling values to a `[0, 1]` range.
41
42
  - **Standardization** is applied when higher variance is detected (large difference between mean and standard deviation), centering values with a mean of `0` and a standard deviation of `1`.
42
43
 
43
44
  This adaptive scaling approach ensures the most effective transformation is applied based on each feature's statistical properties.
44
45
 
45
46
  #### Returns:
46
- - `trainFeatures`, `trainLabels`, `testFeatures`, `testLabels`: Scaled feature and label arrays for training and testing sets.
47
- - `trainFeaturesConfig`, `trainLabelsConfig`: Scaling configuration for features and labels.
48
- - `trainFeaturesKeyNames`, `trainLabelKeyNames`: Key names reflecting feature weights.
47
+ - `trainX`, `trainY`, `testX`, `testY`: Scaled feature and label arrays for training and testing sets.
48
+ - `trainXConfig`, `trainYConfig`: Scaling configuration for X and Y.
49
+ - `trainXKeyNames`, `trainLabelKeyNames`: Key names reflecting feature weights.
49
50
 
50
- ### 2. `parseProductionDataset`
51
+ ### 2. `parseProductionX`
51
52
 
52
- Designed for production environments, this function parses and scales feature data for unseen production datasets. Like `parseTrainingDataset`, it includes options for feature weighting and scaling.
53
+ Designed for production environments, this function parses and scales feature data for unseen production datasets. Like `parseTrainingXY`, it includes options for feature weighting and scaling.
53
54
 
54
55
  #### Parameters:
55
56
  - `arrObj` (Array of Objects): Input data array for production.
56
57
  - `weights` (Object, optional): Feature weights for scaling.
57
- - `parseFeatures` (Function): Custom function to parse features for each object.
58
+ - `xCallbackFunc` (Function): Custom function to parse X for each object.
58
59
  - `forceScaling` (String, optional): Forces a specific scaling approach for each feature.
60
+ - `timeSteps` (Number, optional): Transforms a one-dimensional array into an array of overlapping sequences (timesteps), each of a specified length. Default is 0 returning original output.
59
61
 
60
62
  #### Returns:
61
- - `productionFeatures`: Scaled feature array for production data.
62
- - `productionFeaturesConfig`: Scaling configuration for production data.
63
- - `productionFeaturesKeyNames`: Key names reflecting feature weights.
63
+ - `x`: Scaled feature array for production data.
64
+ - `xConfig`: Scaling configuration for production data.
65
+ - `xKeyNames`: Key names reflecting feature weights.
64
66
 
65
67
  ## Helper Callback Functions for Custom Data Parsing
66
68
 
67
- ### `parseFeatures`
69
+ ### `xCallbackFunc`
68
70
 
69
- The `parseFeatures` function is used to extract specific feature values from each row of data, defining what the model will use as input. By selecting relevant fields in the dataset, `parseFeatures` ensures only the necessary values are included in the model’s feature set, allowing for streamlined preprocessing and improved model performance.
71
+ The `xCallbackFunc` function is used to extract specific feature values from each row of data, defining what the model will use as input. By selecting relevant fields in the dataset, `xCallbackFunc` ensures only the necessary values are included in the model’s feature set, allowing for streamlined preprocessing and improved model performance.
70
72
 
71
- ### `parseLabels`
73
+ ### `yCallbackFunc`
72
74
 
73
- The `parseLabels` function defines the target output (or labels) that the machine learning model will learn to predict. This function typically creates labels by comparing each row of data with a future data point, which is especially useful in time-series data for predictive tasks. In our example, `parseLabels` generates labels based on changes between the current and next rows, which can help the model learn to predict directional trends.
75
+ The `yCallbackFunc` function defines the target output (or Y) that the machine learning model will learn to predict. This function typically creates Y by comparing each row of data with a future data point, which is especially useful in time-series data for predictive tasks. In our example, `yCallbackFunc` generates Y based on changes between the current and next rows, which can help the model learn to predict directional trends.
74
76
 
75
77
 
76
78
  ---
@@ -79,8 +81,8 @@ The `parseLabels` function defines the target output (or labels) that the machin
79
81
 
80
82
  1. **Parsing and Splitting a Training Dataset:**
81
83
 
82
- ```javascript
83
- import { parseTrainingDataset } from './scale.js';
84
+ ```javascript
85
+ import { parseTrainingXY } from './scale.js';
84
86
 
85
87
  const myArray = [
86
88
  { open: 135.23, high: 137.45, low: 134.56, sma_200: 125.34, sma_100: 130.56 },
@@ -88,7 +90,7 @@ The `parseLabels` function defines the target output (or labels) that the machin
88
90
  { open: 137.89, high: 139.34, low: 136.34, sma_200: 127.56, sma_100: 132.78 }
89
91
  ];
90
92
 
91
- const parseFeatures = ({ objRow, index }) => {
93
+ const xCallbackFunc = ({ objRow, index }) => {
92
94
  const curr = objRow[index];
93
95
  const { open, high, low, sma_200, sma_100 } = curr;
94
96
 
@@ -101,7 +103,7 @@ The `parseLabels` function defines the target output (or labels) that the machin
101
103
  };
102
104
  };
103
105
 
104
- const parseLabels = ({ objRow, index }) => {
106
+ const yCallbackFunc = ({ objRow, index }) => {
105
107
  const curr = objRow[index];
106
108
  const next = objRow[index + 1];
107
109
 
@@ -116,22 +118,28 @@ The `parseLabels` function defines the target output (or labels) that the machin
116
118
  };
117
119
  };
118
120
 
119
- const trainingData = parseTrainingDataset({
121
+ const trainingData = parseTrainingXY({
120
122
  arrObj: myArray,
121
123
  trainingSplit: 0.75,
122
124
  weights: { open: 1, high: 1, low: 1, sma_200: 1, sma_100: 1 },
123
- parseLabels,
124
- parseFeatures,
125
- forceScaling: 'normalization'
125
+ yCallbackFunc,
126
+ xCallbackFunc,
127
+ forceScaling: 'normalization',
128
+ timeSteps: 0
126
129
  });
127
- ```
130
+ ```
131
+
132
+ **Output:**
133
+ ```json
134
+ {"trainX":[[0,0,0,0,0]],"trainY":[[0,0,0,0,0]],"testX":[[1,1,1,1,1]],"testY":[[0,0,0,0,0]],"trainXConfig":{"min":{"open":135.23,"high":137.45,"low":134.56,"sma_200":125.34,"sma_100":130.56},"max":{"open":136.45,"high":138.67,"low":135.67,"sma_200":126.78,"sma_100":131.45},"std":{"open":0.8626702730475972,"high":0.8626702730475772,"low":0.7848885271170473,"sma_200":1.0182337649086268,"sma_100":0.6293250352560177},"mean":{"open":135.83999999999997,"high":138.06,"low":135.115,"sma_200":126.06,"sma_100":131.005},"approach":{"open":"normalization","high":"normalization","low":"normalization","sma_200":"normalization","sma_100":"normalization"},"inputTypes":{"open":"number","high":"number","low":"number","sma_200":"number","sma_100":"number"},"uniqueStringIndexes":{}},"trainXKeyNames":["open","high","low","sma_200","sma_100"],"trainYConfig":{"min":{"label_1":true,"label_2":true,"label_3":true,"label_4":true,"label_5":true},"max":{"label_1":true,"label_2":true,"label_3":true,"label_4":true,"label_5":true},"std":{"label_1":0,"label_2":0,"label_3":0,"label_4":0,"label_5":0},"mean":{"label_1":1,"label_2":1,"label_3":1,"label_4":1,"label_5":1},"approach":{"label_1":"normalization","label_2":"normalization","label_3":"normalization","label_4":"normalization","label_5":"normalization"},"inputTypes":{"label_1":"boolean","label_2":"boolean","label_3":"boolean","label_4":"boolean","label_5":"boolean"},"uniqueStringIndexes":{}},"trainYKeyNames":["label_1","label_2","label_3","label_4","label_5"]}
135
+ ```
128
136
 
129
137
  2. **Parsing a Production Dataset:**
130
138
 
131
- ```javascript
132
- import { parseProductionDataset } from './scale.js';
139
+ ```javascript
140
+ import { parseProductionX } from './scale.js';
133
141
 
134
- const parseFeatures = ({ objRow, index }) => {
142
+ const xCallbackFunc = ({ objRow, index }) => {
135
143
  const curr = objRow[index];
136
144
  const { open, high, low, sma_200, sma_100 } = curr;
137
145
 
@@ -144,41 +152,61 @@ The `parseLabels` function defines the target output (or labels) that the machin
144
152
  };
145
153
  };
146
154
 
147
- const productionData = parseProductionDataset({
148
- arrObj: productionArray,
155
+ const myArray = [
156
+ { open: 135.23, high: 137.45, low: 134.56, sma_200: 125.34, sma_100: 130.56 },
157
+ { open: 136.45, high: 138.67, low: 135.67, sma_200: 126.78, sma_100: 131.45 },
158
+ { open: 137.89, high: 139.34, low: 136.34, sma_200: 127.56, sma_100: 132.78 }
159
+ ];
160
+
161
+ const productionData = parseProductionX({
162
+ arrObj: myArray,
149
163
  weights: { open: 2, high: 1, low: 1, sma_200: 1, sma_100: 1 },
150
- parseFeatures: (row) => row.features,
151
- forceScaling: null
164
+ xCallbackFunc,
165
+ forceScaling: null,
166
+ timeSteps: 0
152
167
  });
153
- ```
168
+ ```
169
+
170
+ **Output:**
171
+
172
+ ```json
173
+ {"x":[[-0.9713243322194223,-0.9713243322194223,0,0,-1.0832575234857975,-0.9278787875246485],[-0.05507509100212526,-0.05507509100212526,0.6455026455026398,0.6235955056179688,0.19534152062858562,-0.1312754554697336],[1.026399423221569,1.026399423221569,1,1,0.887916002857212,1.059154242994382]],"xConfig":{"min":{"open":135.23,"high":137.45,"low":134.56,"sma_200":125.34,"sma_100":130.56},"max":{"open":137.89,"high":139.34,"low":136.34,"sma_200":127.56,"sma_100":132.78},"std":{"open":1.3315154273733958,"high":0.9582449234581516,"low":0.899017982764145,"sma_200":1.1262326580240862,"sma_100":1.1172436320397328},"mean":{"open":136.5233333333333,"high":138.48666666666668,"low":135.52333333333334,"sma_200":126.56,"sma_100":131.59666666666666},"approach":{"open":"standardization","high":"normalization","low":"normalization","sma_200":"standardization","sma_100":"standardization"},"inputTypes":{"open":"number","high":"number","low":"number","sma_200":"number","sma_100":"number"},"uniqueStringIndexes":{}},"xKeyNames":["open","open","high","low","sma_200","sma_100"]}
174
+ ```
154
175
 
155
176
  ---
156
177
 
157
178
  ### Upcoming Feature: Optional Precision Handling with Big.js and BigNumber.js
158
179
 
159
- In the next release, we are introducing an optional **precision** feature to enhance decimal precision in financial and scientific datasets. This feature will allow users to integrate **Big.js** or **BigNumber.js** libraries seamlessly into their data processing workflow by adding a new `precision` property to the parameters of `parseTrainingDataset` and `parseProductionDataset`.
180
+ In the next release, we are introducing an optional **precision** feature to enhance decimal precision in financial and scientific datasets. This feature will allow users to integrate **Big.js** or **BigNumber.js** libraries seamlessly into their data processing workflow by adding a new `precision` property to the parameters of `parseTrainingXY` and `parseProductionX`.
160
181
 
161
182
  #### How Precision Handling Will Work
162
183
 
163
184
  With the new `precision` property, users can pass either Big.js or BigNumber.js as callback functions to handle high-precision decimal calculations. This makes the integration fully optional, allowing flexibility based on the precision requirements of the dataset. When `precision` is set, the toolkit will use the specified library for all numeric computations, ensuring high precision and minimizing rounding errors.
164
185
 
165
- **Future Example Usage:**
186
+ 1. **Future Example Usage:**
166
187
 
167
- ```javascript
188
+ ```javascript
168
189
  import Big from 'big.js';
169
- import BigNumber from "bignumber.js";
170
- import { parseTrainingDataset, parseProductionDataset } from './scale.js';
190
+ import BigNumber from 'bignumber.js';
191
+ import { parseTrainingXY, parseProductionX } from './scale.js';
192
+
193
+ const myArray = [
194
+ { open: 135.23, high: 137.45, low: 134.56, sma_200: 125.34, sma_100: 130.56 },
195
+ { open: 136.45, high: 138.67, low: 135.67, sma_200: 126.78, sma_100: 131.45 },
196
+ { open: 137.89, high: 139.34, low: 136.34, sma_200: 127.56, sma_100: 132.78 }
197
+ ];
171
198
 
172
- const trainingData = parseTrainingDataset({
199
+ const trainingData = parseTrainingXY({
173
200
  arrObj: myArray,
174
201
  trainingSplit: 0.75,
175
202
  weights: { open: 1, high: 1, low: 1, sma_200: 1, sma_100: 1 },
176
- parseLabels,
177
- parseFeatures,
178
- precision: Big, // Big or BigNumber for high-precision calculations
179
- forceScaling: 'normalization'
203
+ yCallbackFunc,
204
+ xCallbackFunc,
205
+ precision: Big, // Big or BigNumber callbacks for high-precision calculations
206
+ forceScaling: 'normalization',
207
+ timeSteps: 0
180
208
  });
181
- ```
209
+ ```
182
210
 
183
211
  ---
184
212
 
@@ -1 +1 @@
1
- var XY_Scale;(()=>{"use strict";var e={d:(t,r)=>{for(var n in r)e.o(r,n)&&!e.o(t,n)&&Object.defineProperty(t,n,{enumerable:!0,get:r[n]})},o:(e,t)=>Object.prototype.hasOwnProperty.call(e,t),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},t={};e.r(t),e.d(t,{parseProductionDataset:()=>o,parseTrainingDataset:()=>n});const r=({arrObj:e,weights:t={},forceScaling:r=null})=>{if(null!==r&&"normalization"!==r&&"standardization"!==r)throw Error('forceScalling should be null, "normalization" or "standardization"');const n=e.length;if(0===n)return{scaledOutput:[],scaledConfig:{},keyNames:[]};const o=Object.keys(e[0]),a=o.map((e=>{if(t.hasOwnProperty(e)){const r=t[e];if(r<=0)throw new Error(`Weight for key "${e}" must be positive.`);return r}return 1})),s=a.reduce(((e,t)=>e+t),0),i=new Array(s);let l=0;for(let e=0;e<o.length;e++){const t=o[e],r=a[e];for(let e=0;e<r;e++)i[l++]=t}const c={},u={},f={},d={},g={},p={},y={},b={};for(const t of o){const r=e[0][t];c[t]=typeof r,"string"===c[t]&&(y[t]={}),u[t]=1/0,f[t]=-1/0,d[t]=0,g[t]=0,b[t]=0}for(const t of e)for(const e of o){let r=t[e];if("string"===c[e]){const n=y[e];n.hasOwnProperty(r)||(n[r]=Object.keys(n).length),r=n[r],t[e]=r}r<u[e]&&(u[e]=r),r>f[e]&&(f[e]=r),b[e]++;const n=r-d[e];d[e]+=n/b[e],g[e]+=n*(r-d[e])}const h={};for(const e of o)h[e]=b[e]>1?Math.sqrt(g[e]/(b[e]-1)):0,p[e]="normalization"===r||"standardization"===r?r:h[e]<1?"normalization":"standardization";const m=new Array(n);for(let t=0;t<n;t++){const r=e[t],n=new Array(s);let i=0;for(let e=0;e<o.length;e++){const t=o[e],s=r[t],l=u[t],c=f[t],g=d[t],y=h[t];let b;b="normalization"===p[t]?c!==l?(s-l)/(c-l):0:0!==y?(s-g)/y:0;const m=a[e];for(let e=0;e<m;e++)n[i++]=b}m[t]=n}return{scaledOutput:m,scaledConfig:{min:u,max:f,std:h,mean:d,approach:p,inputTypes:c,uniqueStringIndexes:y},scaledKeyNames:i}},n=({arrObj:e,trainingSplit:t=.8,weights:n={},parseLabels:o,parseFeatures:a,forceScaling:s})=>{const i=[],l=[];for(let t=0;t<e.length;t++){const r=a({objRow:e,index:t}),n=o({objRow:e,index:t});r&&n&&(i.push(r),l.push(n))}const{scaledOutput:c,scaledConfig:u,scaledKeyNames:f}=r({arrObj:i,weights:n,forceScaling:s}),{scaledOutput:d,scaledConfig:g,scaledKeyNames:p}=r({arrObj:l,weights:n,forceScaling:s}),y=Math.floor(c.length*t);return{trainFeatures:c.slice(0,y),trainLabels:d.slice(0,y),testFeatures:c.slice(y),testLabels:d.slice(y),trainFeaturesConfig:u,trainFeaturesKeyNames:f,trainLabelsConfig:g,trainLabelKeyNames:p}},o=({arrObj:e,weights:t={},parseFeatures:n,forceScaling:o})=>{const a=[];for(let t=0;t<e.length;t++){const r=n({objRow:e,index:t});r&&a.push(r)}const{scaledOutput:s,scaledConfig:i,scaledKeyNames:l}=r({arrObj:a,weights:t,forceScaling:o});return{productionFeatures:s,productionFeaturesConfig:i,productionFeaturesKeyNames:l}};XY_Scale=t})();
1
+ var XY_Scale;(()=>{"use strict";var e={d:(t,n)=>{for(var r in n)e.o(n,r)&&!e.o(t,r)&&Object.defineProperty(t,r,{enumerable:!0,get:n[r]})},o:(e,t)=>Object.prototype.hasOwnProperty.call(e,t),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},t={};e.r(t),e.d(t,{parseProductionX:()=>a,parseTrainingXY:()=>o});const n=({arrObj:e,weights:t={},forceScaling:n=null})=>{if(null!==n&&"normalization"!==n&&"standardization"!==n)throw Error('forceScalling should be null, "normalization" or "standardization"');const r=e.length;if(0===r)return{scaledOutput:[],scaledConfig:{},keyNames:[]};const o=Object.keys(e[0]),a=o.map((e=>{if(t.hasOwnProperty(e)){const n=t[e];if(n<=0)throw new Error(`Weight for key "${e}" must be positive.`);return n}return 1})),s=a.reduce(((e,t)=>e+t),0),i=new Array(s);let l=0;for(let e=0;e<o.length;e++){const t=o[e],n=a[e];for(let e=0;e<n;e++)i[l++]=t}const c={},f={},u={},g={},d={},p={},h={},y={};for(const t of o){const n=e[0][t];c[t]=typeof n,"string"===c[t]&&(h[t]={}),f[t]=1/0,u[t]=-1/0,g[t]=0,d[t]=0,y[t]=0}for(const t of e)for(const e of o){let n=t[e];if("string"===c[e]){const r=h[e];r.hasOwnProperty(n)||(r[n]=Object.keys(r).length),n=r[n],t[e]=n}n<f[e]&&(f[e]=n),n>u[e]&&(u[e]=n),y[e]++;const r=n-g[e];g[e]+=r/y[e],d[e]+=r*(n-g[e])}const m={};for(const e of o)m[e]=y[e]>1?Math.sqrt(d[e]/(y[e]-1)):0,p[e]="normalization"===n||"standardization"===n?n:m[e]<1?"normalization":"standardization";const b=new Array(r);for(let t=0;t<r;t++){const n=e[t],r=new Array(s);let i=0;for(let e=0;e<o.length;e++){const t=o[e],s=n[t],l=f[t],c=u[t],d=g[t],h=m[t];let y;y="normalization"===p[t]?c!==l?(s-l)/(c-l):0:0!==h?(s-d)/h:0;const b=a[e];for(let e=0;e<b;e++)r[i++]=y}b[t]=r}return{scaledOutput:b,scaledConfig:{min:f,max:u,std:m,mean:g,approach:p,inputTypes:c,uniqueStringIndexes:h},scaledKeyNames:i}},r=(e,t)=>{if(0===t)return e;if(t<0)throw new Error("timeSteps must be greater than 0");const n=[];for(let r=0;r<=e.length-t;r++)n.push(e.slice(r,r+t));return n},o=({arrObj:e,trainingSplit:t=.8,weights:o={},yCallbackFunc:a,xCallbackFunc:s,forceScaling:i,timeSteps:l=0})=>{const c=[],f=[];for(let t=0;t<e.length;t++){const n=s({objRow:e,index:t}),r=a({objRow:e,index:t});n&&r&&(c.push(n),f.push(r))}const{scaledOutput:u,scaledConfig:g,scaledKeyNames:d}=n({arrObj:c,weights:o,forceScaling:i}),{scaledOutput:p,scaledConfig:h,scaledKeyNames:y}=n({arrObj:f,weights:o,forceScaling:i}),m=Math.floor(u.length*t);return{trainX:r(u.slice(0,m),l),trainY:r(p.slice(0,m),l),testX:r(u.slice(m),l),testY:r(p.slice(m),l),trainXConfig:g,trainXKeyNames:d,trainYConfig:h,trainYKeyNames:y}},a=({arrObj:e,weights:t={},xCallbackFunc:o,forceScaling:a,timeSteps:s=0})=>{const i=[];for(let t=0;t<e.length;t++){const n=o({objRow:e,index:t});n&&i.push(n)}const{scaledOutput:l,scaledConfig:c,scaledKeyNames:f}=n({arrObj:i,weights:t,forceScaling:a});return{x:r(l,s),xConfig:c,xKeyNames:f}};XY_Scale=t})();
package/index.js CHANGED
@@ -1,3 +1,3 @@
1
- import { parseTrainingDataset, parseProductionDataset } from "./src/datasets.js"
1
+ import { parseTrainingXY, parseProductionX } from "./src/datasets.js"
2
2
 
3
- export { parseTrainingDataset, parseProductionDataset }
3
+ export { parseTrainingXY, parseProductionX }
package/package.json CHANGED
@@ -1,10 +1,11 @@
1
1
  {
2
2
  "name": "xy-scale",
3
- "version": "1.0.3",
3
+ "version": "1.0.7",
4
4
  "main": "./index.js",
5
5
  "type": "module",
6
6
  "scripts": {
7
- "build": "npx webpack"
7
+ "build": "npx webpack",
8
+ "test": "node test/test.js"
8
9
  },
9
10
  "author": "",
10
11
  "license": "ISC",
package/src/datasets.js CHANGED
@@ -1,75 +1,90 @@
1
1
  import { scaleArrayObj } from "./scale.js";
2
2
 
3
- export const parseTrainingDataset = ({ arrObj, trainingSplit = 0.8, weights = {}, parseLabels, parseFeatures, forceScaling }) => {
4
- const features = [];
5
- const labels = [];
3
+ const arrayToTimesteps = (arr, timeSteps) => {
4
+ if (timeSteps === 0) return arr;
5
+ if (timeSteps < 0) throw new Error("timeSteps must be greater than 0");
6
+
7
+ const timestepsArray = [];
8
+
9
+ for (let i = 0; i <= arr.length - timeSteps; i++) {
10
+ timestepsArray.push(arr.slice(i, i + timeSteps));
11
+ }
12
+
13
+ return timestepsArray;
14
+ }
15
+
16
+
17
+
18
+ export const parseTrainingXY = ({ arrObj, trainingSplit = 0.8, weights = {}, yCallbackFunc, xCallbackFunc, forceScaling, timeSteps = 0 }) => {
19
+ const X = [];
20
+ const Y = [];
6
21
 
7
22
  for (let x = 0; x < arrObj.length; x++) {
8
- const parsedFeatures = parseFeatures({ objRow: arrObj, index: x });
9
- const parsedLabels = parseLabels({ objRow: arrObj, index: x });
23
+ const parsedX = xCallbackFunc({ objRow: arrObj, index: x });
24
+ const parsedY = yCallbackFunc({ objRow: arrObj, index: x });
10
25
 
11
- if (parsedFeatures && parsedLabels) {
12
- features.push(parsedFeatures)
13
- labels.push(parsedLabels)
26
+ if (parsedX && parsedY) {
27
+ X.push(parsedX)
28
+ Y.push(parsedY)
14
29
  }
15
30
  }
16
31
 
17
- // Scale features and labels, if applicable
32
+ // Scale X and Y, if applicable
18
33
  const {
19
- scaledOutput: scaledFeatures,
20
- scaledConfig: trainFeaturesConfig,
21
- scaledKeyNames: trainFeaturesKeyNames
34
+ scaledOutput: scaledX,
35
+ scaledConfig: trainXConfig,
36
+ scaledKeyNames: trainXKeyNames
22
37
 
23
- } = scaleArrayObj({arrObj: features, weights, forceScaling})
38
+ } = scaleArrayObj({arrObj: X, weights, forceScaling})
24
39
 
25
40
  const {
26
- scaledOutput: scaledLabels,
27
- scaledConfig: trainLabelsConfig,
28
- scaledKeyNames: trainLabelKeyNames
29
- } = scaleArrayObj({arrObj: labels, weights, forceScaling})
41
+ scaledOutput: scaledY,
42
+ scaledConfig: trainYConfig,
43
+ scaledKeyNames: trainYKeyNames
44
+ } = scaleArrayObj({arrObj: Y, weights, forceScaling})
30
45
 
31
- const splitIndex = Math.floor(scaledFeatures.length * trainingSplit)
46
+ const splitIndex = Math.floor(scaledX.length * trainingSplit)
32
47
 
33
48
  // Split into training and testing sets
34
49
  return {
35
- trainFeatures: scaledFeatures.slice(0, splitIndex),
36
- trainLabels: scaledLabels.slice(0, splitIndex),
37
- testFeatures: scaledFeatures.slice(splitIndex),
38
- testLabels: scaledLabels.slice(splitIndex),
39
-
40
- trainFeaturesConfig,
41
- trainFeaturesKeyNames,
42
- trainLabelsConfig,
43
- trainLabelKeyNames
50
+ trainX: arrayToTimesteps(scaledX.slice(0, splitIndex), timeSteps),
51
+ trainY: arrayToTimesteps(scaledY.slice(0, splitIndex), timeSteps),
52
+ testX: arrayToTimesteps(scaledX.slice(splitIndex), timeSteps),
53
+ testY: arrayToTimesteps(scaledY.slice(splitIndex), timeSteps),
54
+
55
+ trainXConfig,
56
+ trainXKeyNames,
57
+ trainYConfig,
58
+ trainYKeyNames
44
59
  };
45
60
  };
46
61
 
47
62
 
48
- export const parseProductionDataset = ({ arrObj, weights = {}, parseFeatures, forceScaling }) => {
49
- const features = [];
63
+ export const parseProductionX = ({ arrObj, weights = {}, xCallbackFunc, forceScaling, timeSteps = 0 }) => {
64
+ const X = [];
50
65
 
51
66
  for (let x = 0; x < arrObj.length; x++) {
52
- const parsedFeatures = parseFeatures({ objRow: arrObj, index: x })
67
+ const parsedX = xCallbackFunc({ objRow: arrObj, index: x })
53
68
 
54
- if (parsedFeatures) {
55
- features.push(parsedFeatures)
69
+ if (parsedX) {
70
+ X.push(parsedX)
56
71
  }
57
72
  }
58
73
 
59
- // Scale features and labels, if applicable
60
- // Scale features and labels, if applicable
74
+ // Scale X and Y, if applicable
75
+ // Scale X and Y, if applicable
61
76
  const {
62
- scaledOutput: scaledFeatures,
63
- scaledConfig: productionFeaturesConfig,
64
- scaledKeyNames: productionFeaturesKeyNames
77
+ scaledOutput: scaledX,
78
+ scaledConfig: xConfig,
79
+ scaledKeyNames: xKeyNames
65
80
 
66
- } = scaleArrayObj({arrObj: features, weights, forceScaling})
81
+ } = scaleArrayObj({arrObj: X, weights, forceScaling})
67
82
 
68
83
 
69
84
  // Split into training and testing sets
70
85
  return {
71
- productionFeatures: scaledFeatures,
72
- productionFeaturesConfig,
73
- productionFeaturesKeyNames
86
+ x: arrayToTimesteps(scaledX, timeSteps),
87
+ xConfig,
88
+ xKeyNames
74
89
  }
75
90
  };
package/test/test.js ADDED
@@ -0,0 +1,66 @@
1
+ import { parseTrainingXY, parseProductionX } from "../src/datasets.js";
2
+
3
+
4
+ const test = () => {
5
+
6
+
7
+ const myArray = [
8
+ { open: 135.23, high: 137.45, low: 134.56, sma_200: 125.34, sma_100: 130.56 },
9
+ { open: 136.45, high: 138.67, low: 135.67, sma_200: 126.78, sma_100: 131.45 },
10
+ { open: 137.89, high: 139.34, low: 136.34, sma_200: 127.56, sma_100: 132.78 }
11
+ ];
12
+
13
+ const xCallbackFunc = ({ objRow, index }) => {
14
+ const curr = objRow[index];
15
+ const { open, high, low, sma_200, sma_100 } = curr;
16
+
17
+ return {
18
+ open,
19
+ high,
20
+ low,
21
+ sma_200,
22
+ sma_100
23
+ };
24
+ };
25
+
26
+ const yCallbackFunc = ({ objRow, index }) => {
27
+ const curr = objRow[index];
28
+ const next = objRow[index + 1];
29
+
30
+ if (typeof next === 'undefined') return null;
31
+
32
+ return {
33
+ label_1: next.open > curr.open, // Label indicating if the next open price is higher than the current
34
+ label_2: next.high > curr.high, // Label indicating if the next high price is higher than the current
35
+ label_3: next.low > curr.low, // Label indicating if the next low price is higher than the current
36
+ label_4: next.sma_200 > curr.sma_200, // Label indicating if the next 200-day SMA is higher than the current
37
+ label_5: next.sma_100 > curr.sma_100 // Label indicating if the next 100-day SMA is higher than the current
38
+ };
39
+ };
40
+
41
+ const trainingData = parseTrainingXY({
42
+ arrObj: myArray,
43
+ trainingSplit: 0.75,
44
+ weights: { open: 1, high: 1, low: 1, sma_200: 1, sma_100: 1 },
45
+ yCallbackFunc,
46
+ xCallbackFunc,
47
+ forceScaling: 'normalization',
48
+ timeSteps: 0
49
+ });
50
+
51
+ //console.log(JSON.stringify(trainingData))
52
+
53
+
54
+ const productionData = parseProductionX({
55
+ arrObj: myArray,
56
+ weights: { open: 2, high: 1, low: 1, sma_200: 1, sma_100: 1 },
57
+ xCallbackFunc,
58
+ forceScaling: null,
59
+ timeSteps: 0
60
+ })
61
+
62
+ console.log(JSON.stringify(productionData))
63
+
64
+ }
65
+
66
+ test()