PyPI - nnpdf - Versions diffs - 4.1.0__py3-none-any.whl → 4.1.1__py3-none-any.whl - Mend

nnpdf 4.1.0py3-none-any.whl → 4.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

n3fit/backends/keras_backend/MetaModel.py +27 -26
n3fit/backends/keras_backend/callbacks.py +16 -8
n3fit/backends/keras_backend/internal_state.py +13 -2
n3fit/backends/keras_backend/operations.py +26 -26
n3fit/hyper_optimization/hyper_scan.py +3 -9
n3fit/hyper_optimization/penalties.py +11 -8
n3fit/hyper_optimization/rewards.py +65 -34
n3fit/model_gen.py +344 -270
n3fit/model_trainer.py +71 -105
n3fit/performfit.py +2 -7
n3fit/tests/regressions/quickcard_1.json +12 -28
n3fit/tests/regressions/quickcard_3.json +12 -28
n3fit/tests/regressions/quickcard_pol_1.json +10 -26
n3fit/tests/regressions/quickcard_pol_3.json +9 -25
n3fit/tests/regressions/quickcard_qed_1.json +11 -27
n3fit/tests/regressions/quickcard_qed_3.json +11 -27
n3fit/tests/test_hyperopt.py +6 -12
n3fit/tests/test_layers.py +6 -6
n3fit/tests/test_modelgen.py +73 -24
n3fit/tests/test_multireplica.py +52 -16
n3fit/tests/test_penalties.py +7 -8
n3fit/tests/test_preprocessing.py +2 -2
n3fit/tests/test_vpinterface.py +5 -10
n3fit/vpinterface.py +88 -44
{nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/METADATA +9 -3
{nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/RECORD +105 -67
{nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/WHEEL +1 -1
nnpdf_data/_version.py +1 -1
nnpdf_data/commondata/ATLAS_2JET_7TEV_R06/metadata.yaml +16 -5
nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/data.yaml +2 -0
nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/kinematics.yaml +13 -0
nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/metadata.yaml +51 -0
nnpdf_data/commondata/ATLAS_TTBAR_13P6TEV_TOT/uncertainties.yaml +17 -0
nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/data.yaml +2 -0
nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/kinematics.yaml +13 -0
nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/metadata.yaml +52 -0
nnpdf_data/commondata/ATLAS_TTBAR_5TEV_TOT/uncertainties.yaml +22 -0
nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/data.yaml +3 -0
nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/kinematics.yaml +17 -0
nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/metadata.yaml +57 -0
nnpdf_data/commondata/ATLAS_WPWM_13P6TEV_TOT/uncertainties.yaml +8 -0
nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/data.yaml +2 -0
nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/kinematics.yaml +9 -0
nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/metadata.yaml +54 -0
nnpdf_data/commondata/ATLAS_Z0_13P6TEV_TOT/uncertainties.yaml +7 -0
nnpdf_data/commondata/CMS_1JET_8TEV/metadata.yaml +7 -1
nnpdf_data/commondata/CMS_2JET_7TEV/metadata.yaml +16 -19
nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/data.yaml +2 -0
nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/kinematics.yaml +13 -0
nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/metadata.yaml +51 -0
nnpdf_data/commondata/CMS_TTBAR_13P6TEV_TOT/uncertainties.yaml +12 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_d2Sig_dmttBar_dyttBar.yaml +17 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_dSig_dmttBar.yaml +8 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_dSig_dpTt.yaml +8 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/data_dSig_dyt.yaml +11 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/filter.py +260 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_d2Sig_dmttBar_dyttBar.yaml +193 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_dSig_dmttBar.yaml +57 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_dSig_dpTt.yaml +57 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/kinematics_dSig_dyt.yaml +81 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/metadata.yaml +114 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/mtt_abs_parton.yaml +828 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/mttytt-abs_parton.yaml +1899 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/ptt_abs_parton.yaml +828 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/submission.yaml +47 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/rawdata/yt_abs_parton.yaml +1179 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_d2Sig_dmttBar_dyttBar.yaml +2282 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_dSig_dmttBar.yaml +1256 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_dSig_dpTt.yaml +1256 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_2L_138FB-1_DIF/uncertainties_dSig_dyt.yaml +1598 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/data.yaml +2 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/kinematics.yaml +13 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/metadata.yaml +51 -0
nnpdf_data/commondata/CMS_TTBAR_13TEV_35P9FB-1_TOT/uncertainties.yaml +17 -0
nnpdf_data/commondata/CMS_TTBAR_5TEV_TOT/metadata.yaml +1 -1
nnpdf_data/commondata/NNPDF_POS_2P24GEV/metadata.yaml +60 -0
nnpdf_data/commondata/dataset_names.yml +6 -1
nnpdf_data/theory_cards/41000010.yaml +42 -0
nnpdf_data/theory_cards/41000011.yaml +43 -0
nnpdf_data/theory_cards/41000012.yaml +43 -0
nnpdf_data/theory_cards/41000013.yaml +42 -0
nnpdf_data/theory_cards/41000014.yaml +43 -0
nnpdf_data/theory_cards/41000015.yaml +43 -0
validphys/_version.py +1 -1
validphys/config.py +30 -10
validphys/convolution.py +37 -14
validphys/coredata.py +15 -5
validphys/covmats.py +9 -2
validphys/dataplots.py +1 -1
validphys/filters.py +17 -3
validphys/fkparser.py +11 -1
validphys/gridvalues.py +1 -0
validphys/hessian2mc.py +5 -5
validphys/lhaindex.py +5 -0
validphys/loader.py +1 -1
validphys/n3fit_data.py +107 -61
validphys/nnprofile_default.yaml +2 -1
validphys/pineparser.py +12 -2
validphys/scripts/postfit.py +4 -4
validphys/scripts/vp_pdfrename.py +8 -9
validphys/tests/conftest.py +6 -2
validphys/tests/test_hessian2mc.py +7 -5
validphys/utils.py +1 -0
n3fit/tests/regressions/quickcard_pol/filter.yml +0 -80
n3fit/tests/regressions/quickcard_pol/nnfit/input/lockfile.yaml +0 -111
n3fit/tests/regressions/quickcard_pol/nnfit/replica_1/quickcard_pol.exportgrid +0 -572
n3fit/tests/regressions/quickcard_pol/nnfit/replica_1/quickcard_pol.json +0 -71
n3fit/tests/regressions/quickcard_pol/nnfit/replica_3/quickcard_pol.exportgrid +0 -615
n3fit/tests/regressions/quickcard_pol/nnfit/replica_3/quickcard_pol.json +0 -71
n3fit/tests/regressions/weights.weights.h5 +0 -0
n3fit/tests/regressions/weights_pol.weights.h5 +0 -0
n3fit/tests/test +0 -1
nnpdf_data/theory_cards/40000099.yaml +0 -41
nnpdf_data/theory_cards/40000099.yml +0 -41
{nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info}/entry_points.txt +0 -0
{nnpdf-4.1.0.dist-info → nnpdf-4.1.1.dist-info/licenses}/LICENSE +0 -0

validphys/covmats.py CHANGED Viewed

@@ -125,7 +125,7 @@ def covmat_from_systematics(
 def dataset_inputs_covmat_from_systematics(
     dataset_inputs_loaded_cd_with_cuts,
-    data_input,
+    data_input=None,
     use_weights_in_covmat=True,
     norm_threshold=None,
     _list_of_central_values=None,
@@ -186,10 +186,16 @@ def dataset_inputs_covmat_from_systematics(
     special_corrs = []
     block_diags = []
     weights = []
     if _list_of_central_values is None:
         # want to just pass None to systematic_errors method
         _list_of_central_values = [None] * len(dataset_inputs_loaded_cd_with_cuts)
+    if data_input is None:
+        if use_weights_in_covmat:
+            raise ValueError("if use_weights_in_covmat=True, ``data_input`` cannot be empty")
+        data_input = [None] * len(dataset_inputs_loaded_cd_with_cuts)
     for cd, dsinp, central_values in zip(
         dataset_inputs_loaded_cd_with_cuts, data_input, _list_of_central_values
     ):
@@ -199,7 +205,8 @@ def dataset_inputs_covmat_from_systematics(
         else:
             sys_errors = cd.systematic_errors(central_values)
         stat_errors = cd.stat_errors.to_numpy()
-        weights.append(np.full_like(stat_errors, dsinp.weight))
+        if use_weights_in_covmat and dsinp is not None:
+            weights.append(np.full_like(stat_errors, dsinp.weight))
         # separate out the special uncertainties which can be correlated across
         # datasets
         is_intra_dataset_error = sys_errors.columns.isin(INTRA_DATASET_SYS_NAME)

validphys/dataplots.py CHANGED Viewed

@@ -1422,7 +1422,7 @@ def plot_xq2(
     ...         {'dataset': 'CMS_Z0_7TEV_DIMUON_2D'},
     ...         {'dataset': 'CMS_WPWM_8TEV_MUON_Y', 'variant': 'legacy'},
     ...         {'dataset': 'CMS_Z0J_8TEV_PT-Y', 'cfac': ['NRM'], 'variant': 'legacy_10'},
-    ...         {'dataset': 'CMS_2JET_7TEV_M12Y'},
+    ...         {'dataset': 'CMS_2JET_7TEV_M12-Y', 'variant': 'legacy'},
     ...         {'dataset': 'CMS_1JET_8TEV_PTY', 'variant': 'legacy'},
     ...         {'dataset': 'CMS_TTBAR_7TEV_TOT_X-SEC', 'variant': 'legacy'},
     ...         {'dataset': 'CMS_TTBAR_8TEV_TOT_X-SEC', 'variant': 'legacy'},

validphys/filters.py CHANGED Viewed

@@ -125,13 +125,18 @@ class FilterDefaults:
 class FilterRule:
     """
     Dataclass which carries the filter rule information.
     """
     dataset: str = None
     process_type: str = None
     rule: str = None
-    reason: str = None
-    local_variables: Mapping[str, Union[str, float]] = None
+    reason: str = dataclasses.field(
+        default=None, hash=False, compare=False
+    )  # Not relevant for hashing
+    local_variables: Mapping[str, Union[str, float]] = dataclasses.field(
+        default=None, hash=False
+    )  # Avoid hash issues with caching
     PTO: str = None
     FNS: str = None
     IC: str = None
@@ -163,9 +168,18 @@ def default_filter_rules_input():
     """
     Return a tuple of FilterRule objects.
     These are defined in ``filters.yaml`` in the ``validphys.cuts`` module.
+    Similarly to `parse_added_filter_rules`, this function checks if the rules
+    are unique, i.d. if there are no multiple rules for the same dataset of
+    process with the same rule (`reason` and `local_variables` are not hashed).
     """
     list_rules = yaml_safe.load(read_text(validphys.cuts, "filters.yaml"))
-    return tuple(FilterRule(**rule) for rule in list_rules)
+    unique_rules = set(FilterRule(**rule) for rule in list_rules)
+    if len(unique_rules) != len(list_rules):
+        raise RuleProcessingError(
+            "Detected repeated filter rules. Please, make sure that "
+            " rules are not repeated in `filters.yaml`."
+        )
+    return tuple(unique_rules)
 def check_nonnegative(var: str):

validphys/fkparser.py CHANGED Viewed

@@ -18,6 +18,8 @@ CFactors applied.
     res = load_fktable(fk)
 """
+# TODO: this module is deprecated and support for older theories is not guaranteed
 import dataclasses
 import functools
 import io
@@ -313,9 +315,17 @@ def parse_fktable(f):
             hadronic = res['GridInfo'].hadronic
             ndata = res['GridInfo'].ndata
             xgrid = res.pop('xGrid')
+            data_idx = sigma.index.get_level_values("data").unique().to_series()
             return FKTableData(
-                sigma=sigma, ndata=ndata, Q0=Q0, metadata=res, hadronic=hadronic, xgrid=xgrid
+                sigma=sigma,
+                ndata=ndata,
+                Q0=Q0,
+                metadata=res,
+                hadronic=hadronic,
+                xgrid=xgrid,
+                data_index=data_idx,
+                legacy=True,
             )
         elif header_name in _KNOWN_SEGMENTS:
             parser = _KNOWN_SEGMENTS[header_name]

validphys/gridvalues.py CHANGED Viewed

@@ -6,6 +6,7 @@ LHAPDF. The tools for representing these grids are in pdfgrids.py
 (the validphys provider module), and the
 basis transformations are in pdfbases.py
 """
 import itertools
 import numpy as np

validphys/hessian2mc.py CHANGED Viewed

@@ -6,14 +6,14 @@ like MSHT20 and CT18 to Monte Carlo sets.
 The functions implemented here follow equations (4.3) of the paper arXiv:2203.05506
 """
-import pathlib
-import lhapdf
-import os
 import logging
+import os
 import numpy as np
-from validphys.lhio import load_all_replicas, rep_matrix, write_replica
 from validphys.checks import check_pdf_is_hessian
+from validphys.lhaindex import get_lha_datapath
+from validphys.lhio import load_all_replicas, rep_matrix, write_replica
 log = logging.getLogger(__name__)
@@ -108,7 +108,7 @@ def write_hessian_to_mc_watt_thorne(pdf, mc_pdf_name, num_members, watt_thorne_r
     """
     hessian_set = pdf
-    lhapdf_path = pathlib.Path(lhapdf.paths()[-1])
+    lhapdf_path = get_lha_datapath()
     # path to hessian lhapdf set
     hessian_pdf_path = lhapdf_path / str(hessian_set)

validphys/lhaindex.py CHANGED Viewed

@@ -150,3 +150,8 @@ def get_index_path(folder=None):
         folder = get_lha_datapath()
     index_file = os.path.join(folder, 'pdfsets.index')
     return index_file
+def paths_prepend(new_path):
+    """Prepend a path to the LHAPDF list of paths so that it takes precedence."""
+    lhapdf.pathsPrepend(new_path.as_posix())

validphys/loader.py CHANGED Viewed

@@ -1168,7 +1168,7 @@ class RemoteLoader(LoaderBase):
             raise PDFNotFound("PDF '%s' is neither an uploaded fit nor an " "LHAPDF set." % name)
     def download_theoryID(self, thid):
-        thid = str(thid)
+        thid = str(int(thid))
         remote = self.remote_theories
         if thid not in remote:
             raise TheoryNotFound("Theory %s not available." % thid)

validphys/n3fit_data.py CHANGED Viewed

@@ -22,6 +22,14 @@ from validphys.n3fit_data_utils import validphys_group_extractor
 log = logging.getLogger(__name__)
+class Hashrray(TupleComp):
+    """Wrapper class to hash a numpy array so it can be cached."""
+    def __init__(self, array):
+        self.array = array
+        super().__init__(hash(self.array.tobytes()))
 def _per_replica(f):
     """Decorator to be used on top of reportengine's decorators.
     It replaces the preparation step of the decorator with a custom function,
@@ -153,7 +161,7 @@ class _Masks(TupleComp):
         super().__init__(group_name, seed)
-def _diagonal_masks(
+def diagonal_masks(
     data, replica_trvlseed, dataset_inputs_fitting_covmat, diagonal_frac=1.0, threshold_eigvals=0
 ):
@@ -187,7 +195,7 @@ def _diagonal_masks(
     )
-def _standard_masks(data, replica_trvlseed):
+def standard_masks(data, replica_trvlseed):
     """Generate the boolean masks used to split data into training and
     validation points. Returns a list of 1-D boolean arrays, one for each
     dataset. Each array has length equal to N_data, the datapoints which
@@ -203,6 +211,7 @@ def _standard_masks(data, replica_trvlseed):
     trmask_partial = []
     vlmask_partial = []
+    nomasking = True
     for dataset in data.datasets:
         # TODO: python commondata will not require this rubbish.
         # all data if cuts are None
@@ -214,6 +223,8 @@ def _standard_masks(data, replica_trvlseed):
             continue
         frac = dataset.frac
+        # nomasking turns to False as soon as one frac is not equal to 1
+        nomasking &= frac == 1.0
         # We do this so that a given dataset will always have the same number of points masked
         trmax = int(ndata * frac)
         if trmax == 0:
@@ -224,6 +235,9 @@ def _standard_masks(data, replica_trvlseed):
         vl_mask = ~tr_mask
         trmask_partial.append(tr_mask)
         vlmask_partial.append(vl_mask)
+    # if we are not masking, remove the seed from the object
+    if nomasking:
+        replica_trvlseed = None
     return _Masks(str(data), replica_trvlseed, trmask_partial, vlmask_partial)
@@ -304,65 +318,24 @@ def fittable_datasets_masked(data):
     return validphys_group_extractor(data.datasets)
-def fitting_data_dict(
-    data,
-    make_replica,
-    dataset_inputs_loaded_cd_with_cuts,
-    dataset_inputs_fitting_covmat,
-    masks,
-    kfold_masks,
-    fittable_datasets_masked,
-    diagonal_basis=False,
-):
-    """
-    Provider which takes  the information from validphys ``data``.
+def _hashed_dataset_inputs_fitting_covmat(dataset_inputs_fitting_covmat) -> Hashrray:
+    """Wrap the covmat into a Hashrray for caches to work"""
+    return Hashrray(dataset_inputs_fitting_covmat)
-    Returns
-    -------
-    all_dict_out: dict
-        Containing all the information of the experiment/dataset
-        for training, validation and experimental With the following keys:
-        'datasets'
-            list of dictionaries for each of the datasets contained in ``data``
-        'name'
-            name of the ``data`` - typically experiment/group name
-        'expdata_true'
-            non-replica data
-        'covmat'
-            full covmat
-        'invcovmat_true'
-            inverse of the covmat (non-replica)
-        'trmask'
-            mask for the training data
-        'invcovmat'
-            inverse of the covmat for the training data
-        'ndata'
-            number of datapoints for the training data
-        'expdata'
-            experimental data (replica'd) for training
-        'vlmask'
-            (same as above for validation)
-        'invcovmat_vl'
-            (same as above for validation)
-        'ndata_vl'
-            (same as above for validation)
-        'expdata_vl'
-            (same as above for validation)
-        'positivity'
-            bool - is this a positivity set?
-        'count_chi2'
-            should this be counted towards the chi2
-    """
-    # TODO: Plug in the python data loading when available. Including but not
-    # limited to: central values, ndata, replica generation, covmat construction
-    expdata_true = np.concatenate([d.central_values for d in dataset_inputs_loaded_cd_with_cuts])
-    expdata = make_replica
+@functools.lru_cache
+def _inv_covmat_prepared(masks, _hashed_dataset_inputs_fitting_covmat, diagonal_basis=False):
+    """Returns the inverse covmats for training, validation and total
+    attending to the right masks and whether it is diagonal or not.
-    covmat = dataset_inputs_fitting_covmat  # t0 covmat, or theory covmat or whatever was decided by the runcard
-    # TODO: use cholesky decomposition to get the inverse of the covariance matrix
-    inv_true = np.linalg.inv(covmat)
-    fittable_datasets = fittable_datasets_masked
+    Since the masks and number of datapoints need to be treated for 1-point datasets
+    it also returns the right ndata and masks for training and validation:
+    inv_total, inv_training, inv_validation, ndata_tr, ndata_vl, mask_tr, mask_vl, diagonal_rotation
+    """
+    covmat = _hashed_dataset_inputs_fitting_covmat.array
+    inv_total = np.linalg.inv(covmat)
+    diagonal_rotation = None
     if diagonal_basis:
         log.info("working in diagonal basis.")
@@ -372,7 +345,6 @@ def fitting_data_dict(
         # rotate the experimental data to the diagonal basis of the cormat and obtain training/validation masks
         diagonal_rotation = masks.diagonal_rotation
-        expdata = diagonal_rotation @ expdata
         tr_mask = masks.tr_masks[0]
         vl_mask = masks.vl_masks[0]
@@ -455,6 +427,80 @@ def fitting_data_dict(
         ndata_tr -= len(data_zero_tr)
         ndata_vl -= len(data_zero_vl)
+    return (
+        inv_total,
+        invcovmat_tr,
+        invcovmat_vl,
+        ndata_tr,
+        ndata_vl,
+        tr_mask,
+        vl_mask,
+        diagonal_rotation,
+    )
+def fitting_data_dict(
+    data,
+    make_replica,
+    dataset_inputs_loaded_cd_with_cuts,
+    dataset_inputs_fitting_covmat,
+    _inv_covmat_prepared,
+    kfold_masks,
+    fittable_datasets_masked,
+):
+    """
+    Provider which takes  the information from validphys ``data``.
+    Returns
+    -------
+    all_dict_out: dict
+        Containing all the information of the experiment/dataset
+        for training, validation and experimental With the following keys:
+        'datasets'
+            list of dictionaries for each of the datasets contained in ``data``
+        'name'
+            name of the ``data`` - typically experiment/group name
+        'expdata_true'
+            non-replica data
+        'covmat'
+            full covmat
+        'invcovmat_true'
+            inverse of the covmat (non-replica)
+        'trmask'
+            mask for the training data
+        'invcovmat'
+            inverse of the covmat for the training data
+        'ndata'
+            number of datapoints for the training data
+        'expdata'
+            experimental data (replica'd) for training
+        'vlmask'
+            (same as above for validation)
+        'invcovmat_vl'
+            (same as above for validation)
+        'ndata_vl'
+            (same as above for validation)
+        'expdata_vl'
+            (same as above for validation)
+        'positivity'
+            bool - is this a positivity set?
+        'count_chi2'
+            should this be counted towards the chi2
+    """
+    # TODO: Plug in the python data loading when available. Including but not
+    # limited to: central values, ndata, replica generation, covmat construction
+    expdata_true = np.concatenate([d.central_values for d in dataset_inputs_loaded_cd_with_cuts])
+    expdata = make_replica
+    fittable_datasets = fittable_datasets_masked
+    inv_true, invcovmat_tr, invcovmat_vl, ndata_tr, ndata_vl, tr_mask, vl_mask, diag_rot = (
+        _inv_covmat_prepared
+    )
+    if diag_rot is not None:
+        expdata = diag_rot @ expdata
     expdata_tr = expdata[tr_mask].reshape(1, -1)
     expdata_vl = expdata[vl_mask].reshape(1, -1)
@@ -477,7 +523,7 @@ def fitting_data_dict(
         "name": str(data),
         "expdata_true": expdata_true.reshape(1, -1),
         "invcovmat_true": inv_true,
-        "covmat": covmat,
+        "covmat": dataset_inputs_fitting_covmat,
         "trmask": tr_mask,
         "invcovmat": invcovmat_tr,
         "ndata": ndata_tr,
@@ -489,7 +535,7 @@ def fitting_data_dict(
         "positivity": False,
         "count_chi2": True,
         "folds": folds,
-        "data_transformation": diagonal_rotation if diagonal_basis else None,
+        "data_transformation": diag_rot,
     }
     return dict_out

validphys/nnprofile_default.yaml CHANGED Viewed

@@ -37,7 +37,6 @@ ekos_path: ekos
 # Remote resource locations
 fit_urls:
     - 'https://data.nnpdf.science/fits/'
-    - 'https://nnpdf.web.cern.ch/nnpdf/fits/'
 fit_index: 'fitdata.json'
@@ -47,12 +46,14 @@ hyperscan_urls:
 hyperscan_index: 'hyperscandata.json'
 theory_urls:
+    - 'https://nnpdf.nikhef.nl/nnpdf/theories/'
     - 'https://nnpdf.web.cern.ch/nnpdf/tables/'
     - 'https://nnpdf.web.cern.ch/nnpdf/tables_box/'
 theory_index: 'theorydata.json'
 eko_urls:
+    - 'https://nnpdf.nikhef.nl/nnpdf/ekos/'
     - 'https://nnpdf.web.cern.ch/nnpdf/ekos/'
     - 'https://nnpdf.web.cern.ch/nnpdf/ekos_box/'

validphys/pineparser.py CHANGED Viewed

@@ -220,6 +220,7 @@ def pineappl_reader(fkspec):
     partial_fktables = []
     ndata = 0
+    full_data_index = []
     for fkname, p in zip(fknames, pines):
         # Start by reading possible cfactors if cfactor is not empty
         cfprod = 1.0
@@ -267,6 +268,7 @@ def pineappl_reader(fkspec):
         partial_fktables.append(pd.DataFrame(df_fktable, columns=lumi_columns, index=idx))
         ndata += n
+        full_data_index.append(data_idx)
     # Finallly concatenate all fktables, sort by flavours and fill any holes
     sigma = pd.concat(partial_fktables, sort=True, copy=False).fillna(0.0)
@@ -285,8 +287,15 @@ def pineappl_reader(fkspec):
             ndata = 1
         if ndata == 1:
-            # There's no doubt
-            protected = divisor == name
+            # When the number of points is 1 and the fktable is a divisor, protect it from cuts
+            if divisor == name:
+                protected = True
+                full_data_index = [[0]]
+    # Keeping the data index as a series is exploited to speed up convolutions
+    # see e.g., convolution.py::_gv_hadron_predictions
+    fid = np.concatenate(full_data_index)
+    data_index = pd.Series(fid, index=fid, name="data")
     return FKTableData(
         sigma=sigma,
@@ -297,4 +306,5 @@ def pineappl_reader(fkspec):
         hadronic=hadronic,
         xgrid=xgrid,
         protected=protected,
+        data_index=data_index,
     )

validphys/scripts/postfit.py CHANGED Viewed

@@ -22,12 +22,12 @@ import re
 import shutil
 import sys
-import lhapdf
 from reportengine import colors
 from validphys import fitdata, fitveto, lhio
 from validphys.core import PDF
 from validphys.fitveto import INTEG_THRESHOLD, NSIGMA_DISCARD_ARCLENGTH, NSIGMA_DISCARD_CHI2
+from validphys.lhaindex import paths_prepend
+from validphys.lhapdf_compatibility import make_pdf
 from validphys.loader import Loader
 from validphys.utils import tempfile_cleaner
@@ -218,13 +218,13 @@ def _postfit(
         log.info("Beginning construction of replica 0")
         # It's important that this is prepended, so that any existing instance of
         # `fitname` is not read from some other path
-        lhapdf.pathsPrepend(str(postfit_path))
+        paths_prepend(postfit_path)
         generatingPDF = PDF(fitname)
         lhio.generate_replica0(generatingPDF)
         # Test replica 0
         try:
-            lhapdf.mkPDF(fitname, 0)
+            make_pdf(fitname, 0)
         except RuntimeError as e:
             raise PostfitError("CRITICAL ERROR: Failure in reading replica zero") from e

validphys/scripts/vp_pdfrename.py CHANGED Viewed

@@ -1,12 +1,12 @@
 #!/usr/bin/env python
 """
-    vp-pdfrename - command line tool to rename LHAPDFs
+vp-pdfrename - command line tool to rename LHAPDFs
-    To obtain the PDF from an fit, simply run
-    vp-pdfrename <path-to-fit> <PDF name>. Optional flags allow for the
-    resulting pdf to be placed in the LHAPDF directory, as well as modifying
-    various fields of the info file. In addition, it is possible to compress
-    the resulting PDF also using tar archiving.
+To obtain the PDF from an fit, simply run
+vp-pdfrename <path-to-fit> <PDF name>. Optional flags allow for the
+resulting pdf to be placed in the LHAPDF directory, as well as modifying
+various fields of the info file. In addition, it is possible to compress
+the resulting PDF also using tar archiving.
 """
 import argparse
@@ -18,9 +18,8 @@ import sys
 import tarfile
 import tempfile
-import lhapdf
 from reportengine import colors
+from validphys.lhaindex import get_lha_datapath
 from validphys.renametools import rename_pdf
 from validphys.utils import yaml_rt
@@ -118,7 +117,7 @@ def main():
     log.addHandler(colors.ColorHandler())
     if args.lhapdf_path:
-        dest_path = pathlib.Path(lhapdf.paths()[-1]) / pdf_name
+        dest_path = get_lha_datapath() / pdf_name
     else:
         dest_path = source_path.with_name(pdf_name)

validphys/tests/conftest.py CHANGED Viewed

@@ -9,14 +9,18 @@ import pathlib
 import sys
 from hypothesis import settings
-import lhapdf
 import pytest
 # Adding this here to change the time of deadline from default (200ms) to 1500ms
 settings.register_profile("extratime", deadline=1500)
 settings.load_profile("extratime")
-lhapdf.setVerbosity(0)
+try:
+    import lhapdf
+    lhapdf.setVerbosity(0)
+except ModuleNotFoundError:
+    pass
 # Fortunately py.test works much like reportengine and providers are

validphys/tests/test_hessian2mc.py CHANGED Viewed

@@ -1,8 +1,10 @@
+import pathlib
+from unittest import mock
 import numpy as np
 import pandas as pd
-from unittest import mock
-from validphys.hessian2mc import write_mc_watt_thorne_replicas, write_hessian_to_mc_watt_thorne
-import pathlib
+from validphys.hessian2mc import write_hessian_to_mc_watt_thorne, write_mc_watt_thorne_replicas
 @mock.patch("validphys.hessian2mc.write_replica")
@@ -40,7 +42,7 @@ def test_write_mc_watt_thorne_replicas(mock_log_info, mock_write_replica):
 @mock.patch("validphys.hessian2mc.rep_matrix")
 @mock.patch("validphys.hessian2mc.write_new_lhapdf_info_file_from_previous_pdf")
 @mock.patch("validphys.hessian2mc.os.makedirs")
-@mock.patch("validphys.hessian2mc.lhapdf.paths")
+@mock.patch("validphys.hessian2mc.get_lha_datapath")
 def test_write_hessian_to_mc_watt_thorne(
     mock_lhapdf_paths,
     mock_makedirs,
@@ -56,7 +58,7 @@ def test_write_hessian_to_mc_watt_thorne(
     mock_load_all_replicas.return_value = (None, None)
-    mock_lhapdf_paths.return_value = [pathlib.Path("/path/to/lhapdf")]
+    mock_lhapdf_paths.return_value = pathlib.Path("/path/to/lhapdf")
     mock_rep_matrix.return_value = np.random.randn(5, 7)  # Mocked replica matrix

validphys/utils.py CHANGED Viewed

@@ -8,6 +8,7 @@ from ruamel.yaml import YAML
 yaml_safe = YAML(typ='safe')
 yaml_rt = YAML(typ='rt')
+yaml_rt.width = 2**31  # to prevent ruamel.yaml introducing linebreaks
 @contextlib.contextmanager

nnpdf 4.1.0__py3-none-any.whl → 4.1.1__py3-none-any.whl

nnpdf 4.1.0py3-none-any.whl → 4.1.1py3-none-any.whl